Esempio n. 1
0
    def _get_hosts(self):
        """ Return list of hostnames sorted by load. """
        # Get host load information.
        try:
            proc = ShellProc(self._QHOST, stdout=PIPE)
        except Exception as exc:
            self._logger.error('%r failed: %s' % (self._QHOST, exc))
            return []
        lines = proc.stdout.readlines()

        # Reduce to hosts we're interested in and sort by CPU-adjusted load.
        loads = []
        for line in lines:
            if line.startswith(('HOSTNAME', '-')):
                continue
            hostname, arch, ncpu, load, \
                memtot, memuse, swapto, swapus = line.split()
            if self.pattern:
                if not fnmatch.fnmatchcase(hostname, self.pattern):
                    continue
            try:
                load = float(load)
                ncpu = int(ncpu)
            except ValueError:
                continue
            loads.append((hostname, load / ncpu, ncpu))
        loads = sorted(loads, key=lambda item: item[1])

        # Return list of hostnames.
        hosts = []
        for hostname, load, ncpu in loads:
            for i in range(ncpu):
                hosts.append(hostname)
        return hosts
Esempio n. 2
0
def setup_tunnel(address, port):
    """
    Setup tunnel to `address` and `port` assuming:

    - The remote login name matches the local login name.
    - `port` is available on the local host.
    - 'plink' is available on Windows, 'ssh' on other platforms.
    - No user interaction is required to connect via 'plink'/'ssh'.

    address: string
        IPv4 address to tunnel to.

    port: int
        Port at `address` to tunnel to.

    Returns ``(local_address, local_port)``.
    """
    logname = 'tunnel-%s-%d.log' % (address, port)
    logname = os.path.join(os.getcwd(), logname)
    stdout = open(logname, 'w')

    user = getpass.getuser()
    if sys.platform == 'win32':  # pragma no cover
        stdin = open('nul:', 'r')
        args = ['plink', '-ssh', '-l', user,
                '-L', '%d:localhost:%d' % (port, port), address]
    else:
        stdin = open('/dev/null', 'r')
        args = ['ssh', '-l', user,
                '-L', '%d:localhost:%d' % (port, port), address]

    tunnel_proc = ShellProc(args, stdin=stdin, stdout=stdout, stderr=STDOUT)
    sock = socket.socket(socket.AF_INET)
    address = ('127.0.0.1', port)
    for retry in range(20):
        time.sleep(.5)
        exitcode = tunnel_proc.poll()
        if exitcode is not None:
            msg = 'ssh tunnel process exited with exitcode %d,' \
                  ' output in %s' % (exitcode, logname)
            logging.error(msg)
            raise RuntimeError(msg)
        try:
            sock.connect(address)
        except socket.error as exc:
            if exc.args[0] != errno.ECONNREFUSED and \
               exc.args[0] != errno.ENOENT:
                raise
        else:
            atexit.register(_cleanup_tunnel, tunnel_proc, logname)
            sock.close()
            return address

    _cleanup_tunnel(tunnel_proc, logname)
    raise RuntimeError('Timeout trying to connect through tunnel to %s'
                       % address)
Esempio n. 3
0
def _unused_remote_port(address, port, user, identity):
    """ Return a (currently) unused port on `address`, default to `port`. """
    if '@' in address:
        user, host = address.split('@')
    else:
        user = user or getpass.getuser()
        host = address

    if sys.platform == 'win32':  # pragma no cover
        cmd = ['plink', '-batch', '-ssh']
    else:
        cmd = ['ssh']

    cmd += ['-l', user]
    if identity:
        cmd += ['-i', identity]
    cmd += ['-x', '-T']

    # FIXME: this currently won't work for Windows if ssh doesn't connect to a
    # UNIX-like shell (cygwin, etc.)
    code = '''"import socket
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.bind(('localhost', 0))
port = sock.getsockname()[1]
sock.close()
print 'port', port"'''

    cmd += [host, 'python', '-c', code.replace('\n', ';')]
    try:
        proc = ShellProc(cmd,
                         stdout=PIPE,
                         stderr=PIPE,
                         universal_newlines=True)
    except Exception as exc:
        logging.warning("Can't get unused port on %s from %s (forcing %s): %s",
                        host, cmd, port, exc)
        return port

    output = proc.stdout.read()
    for line in output.split('\n'):
        if line.startswith('port'):
            remote_port = int(line.split()[1])
            logging.debug('Unused remote port %s on %s', remote_port, host)
            return remote_port
    else:
        logging.warning(
            "Can't get unused port on %s from %s (forcing %s):\n"
            "[stdout]\n%s\n[stderr]\n%s", host, cmd, port, output,
            proc.stderr.read())
        return port
Esempio n. 4
0
    def test_errormsg(self):
        logging.debug('')
        logging.debug('test_errormsg')

        cmd = 'dir' if sys.platform == 'win32' else 'ls'
        try:
            proc = ShellProc(cmd, stdout='stdout', stderr='stderr')
            proc.wait()
        finally:
            if os.path.exists('stdout'):
                os.remove('stdout')
            if os.path.exists('stderr'):
                os.remove('stderr')

        msg = proc.error_message(-signal.SIGTERM)
        if sys.platform == 'win32':
            self.assertEqual(msg, '')
        else:
            self.assertEqual(msg, ': SIGTERM')
    def _execute_local(self):
        """ Run command. """
        self._logger.info('executing %s...', self.command)
        start_time = time.time()

        self._process = \
            ShellProc(self.command, self.stdin, self.stdout, self.stderr,
                      self.env_vars)
        self._logger.debug('PID = %d', self._process.pid)

        try:
            return_code, error_msg = \
                self._process.wait(self.poll_delay, self.timeout)
        finally:
            self._process.close_files()
            self._process = None

        et = time.time() - start_time
        if et >= 60:  #pragma no cover
            self._logger.info('elapsed time: %.1f sec.', et)

        return (return_code, error_msg)
Esempio n. 6
0
def _start_tunnel(address, port, args, user, identity, prefix):
    """ Start an ssh tunnel process. """
    if '@' in address:
        user, host = address.split('@')
    else:
        user = user or getpass.getuser()
        host = address

    if sys.platform == 'win32':  # pragma no cover
        cmd = ['plink', '-batch', '-ssh']
    else:
        cmd = ['ssh']

    cmd += ['-l', user]
    if identity:
        cmd += ['-i', identity]
    cmd += ['-N', '-x', '-T']  # plink doesn't support '-n' (no stdin)
    cmd += args + [host]

    logname = '%s-%s-%s.log' % (prefix, host, port)
    logname = os.path.join(os.getcwd(), logname)
    stdout = open(logname, 'w')

    tunnel_proc = None
    try:
        tunnel_proc = ShellProc(cmd, stdout=stdout, stderr=STDOUT)
    except Exception as exc:
        raise RuntimeError("Can't create ssh tunnel process from %s: %s"
                           % (cmd, exc))
    time.sleep(1)
    exitcode = tunnel_proc.poll()
    if exitcode is not None:
        raise RuntimeError('ssh tunnel process for %s:%s exited with exitcode'
                           ' %d, output in %s'
                           % (address, port, exitcode, logname))

    return (_cleanup_tunnel, tunnel_proc, stdout, logname, os.getpid())
Esempio n. 7
0
    def execute_command(self, resource_desc):
        """
        Submit command based on `resource_desc`.

        resource_desc: dict
            Description of command and required resources.

        The '-V' `qsub` option is always used to export the current environment
        to the job. This environment is first updated with any 'job_environment'
        data. The '-W block=true' `qsub` option is used to wait for job
        completion.

        Other job resource keys are processed as follows:

        ========================= ===========================
        Resource Key              Translation
        ========================= ===========================
        ``submit_as_hold``        -h
        ------------------------- ---------------------------
        rerunnable                -r y|n
        ------------------------- ---------------------------
        ``working_directory``     Handled in generated script
        ------------------------- ---------------------------
        ``job_category``          Ignored
        ------------------------- ---------------------------
        ``min_cpus``              -l select= `value` :ncpus=1
        ------------------------- ---------------------------
        ``max_cpus``              Ignored
        ------------------------- ---------------------------
        ``min_phys_memory``       Ignored
        ------------------------- ---------------------------
        email                     -M `value`
        ------------------------- ---------------------------
        ``email_on_started``      -m b
        ------------------------- ---------------------------
        ``email_on_terminated``   -m e
        ------------------------- ---------------------------
        ``job_name``              -N `value`
        ------------------------- ---------------------------
        ``input_path``            Handled in generated script
        ------------------------- ---------------------------
        ``output_path``           Handled in generated script
        ------------------------- ---------------------------
        ``error_path``            Handled in generated script
        ------------------------- ---------------------------
        ``join_files``            Handled in generated script
        ------------------------- ---------------------------
        ``reservation_id``        Ignored
        ------------------------- ---------------------------
        ``queue_name``            -q `value`
        ------------------------- ---------------------------
        priority                  -p `value`
        ------------------------- ---------------------------
        ``start_time``            -a `value`
        ------------------------- ---------------------------
        ``deadline_time``         Ignored
        ------------------------- ---------------------------
        ``accounting_id``         -W group_list= `value`
        ========================= ===========================

        Where `value` is the corresponding resource value.

        To support a working directory other than HOME or a
        PBS-generated scratch directory, a short script is written with
        PBS directives in the header. The script will change to the working
        directory and then run the command.
        
        If 'working_directory' is not specified, use current server directory.
        If 'input_path' is not specified, use ``/dev/null``.
        If 'output_path' is not specified, use ``<remote_command>.stdout``.
        If 'error_path' is not specified, use stdout.

        If 'native_specification' is specified, it is added to the `qsub`
        command just before the name of the generated script. If it contains
        a ``select`` clause, then that will prevent generation of a ``select``
        clause related to 'min_cpus'.

        Some resource limits are also handled:

        ==================== =========================
        Resource Key         Translation
        ==================== =========================
        ``core_file_size``   Ignored
        -------------------- -------------------------
        ``data_seg_size``    Ignored
        -------------------- -------------------------
        ``file_size``        Ignored
        -------------------- -------------------------
        ``open_files``       Ignored
        -------------------- -------------------------
        ``stack_size``       Ignored
        -------------------- -------------------------
        ``virtual_memory``   Ignored
        -------------------- -------------------------
        ``cpu_time``         Ignored
        -------------------- -------------------------
        ``wallclock_time``   -l walltime= `value`
        ==================== =========================

        Output from `qsub` itself is routed to ``qsub.out``.
        If the job reports an error, ``qsub.out`` will be appended to either
        `error_path`, or if that was not specified, stdout.
        """
        self.home_dir = os.path.expanduser('~')
        self.work_dir = ''

        cmd = list(self._QSUB)
        cmd.extend(('-V', '-W', 'block=true', '-j', 'oe'))
        if sys.platform == 'win32':  # pragma no cover
            prefix = 'REM PBS'
            cmd.extend(('-C', '"%s"' % prefix))
            suffix = '-qsub.bat'
        else:
            prefix = '#PBS'
            cmd.extend(('-S', '/bin/sh'))
            suffix = '.qsub'
        env = None
        inp, out, err = None, None, None
        join_files = False

        # Set working directory now, for possible path fixing.
        try:
            value = resource_desc['working_directory']
        except KeyError:
            pass
        else:
            self.work_dir = self._fix_path(value)

        # Write script to be submitted rather than putting everything on
        # 'qsub' command line. We have to do this since otherwise there's
        # no way to set an execution directory or input path.
        base = None
        if 'job_name' in resource_desc:
            base = self._jobname(resource_desc['job_name'])
        if not base:
            base = os.path.basename(resource_desc['remote_command'])
        script_name = '%s%s' % (base, suffix)

        native_specification = resource_desc.get('native_specification', [])

        with open(script_name, 'w') as script:
            if sys.platform == 'win32':  # pragma no cover
                script.write('@echo off\n')
            else:
                script.write('#!/bin/sh\n')

            # PBS (at least at NAS) requires 'group_list' be set.
            if 'accounting_id' in resource_desc:
                accounting_id = resource_desc['accounting_id']
            else:
                accounting_id = self.accounting_id
            script.write('%s -W group_list=%s\n' %
                         (prefix, accounting_id.strip()))

            # Process description in fixed, repeatable order.
            keys = ('submit_as_hold', 'rerunnable', 'job_environment',
                    'min_cpus', 'email', 'email_on_started',
                    'email_on_terminated', 'job_name', 'input_path',
                    'output_path', 'error_path', 'join_files', 'queue_name',
                    'priority', 'start_time')

            email_events = ''
            for key in keys:
                try:
                    value = resource_desc[key]
                except KeyError:
                    continue

                if key == 'submit_as_hold':
                    if value:
                        script.write('%s -h\n' % prefix)
                elif key == 'rerunnable':
                    script.write('%s -r %s\n' %
                                 (prefix, 'y' if value else 'n'))
                elif key == 'job_environment':
                    env = value
                elif key == 'min_cpus':
                    # Only write select clause if not in 'native_specification'.
                    for arg in native_specification:
                        if 'select' in arg:
                            break
                    else:
                        script.write('%s -l select=%d:ncpus=1\n' %
                                     (prefix, value))
                elif key == 'email':
                    script.write('%s -M %s\n' % (prefix, ','.join(value)))
                elif key == 'email_on_started':
                    email_events += 'b'
                elif key == 'email_on_terminated':
                    email_events += 'e'
                elif key == 'job_name':
                    value = value or base
                    script.write('%s -N %s\n' % (prefix, self._jobname(value)))
                elif key == 'input_path':
                    inp = value
                elif key == 'output_path':
                    out = value
                elif key == 'error_path':
                    err = value
                elif key == 'join_files':
                    join_files = value
                elif key == 'queue_name':
                    script.write('%s -q %s\n' % (prefix, value))
                elif key == 'priority':
                    script.write('%s -p %d\n' % (prefix, value))
                elif key == 'start_time':
                    script.write('%s -a %s\n' %
                                 (prefix, value.strftime('%Y%m%d%H%M.%S')))

            if email_events:
                script.write('%s -m %s\n' % (prefix, email_events))

            # Set resource limits.
            if 'resource_limits' in resource_desc:
                limits = resource_desc['resource_limits']
                if 'wallclock_time' in limits:
                    wall_time = limits['wallclock_time']
                    script.write('%s -l walltime=%s\n' %
                                 (prefix, self._timelimit(wall_time)))

            # Have script move to work directory relative to
            # home directory on execution host.
            home = os.path.realpath(os.path.expanduser('~'))
            work = os.path.realpath(self.work_dir or os.getcwd())
            if work.startswith(home):
                work = work[len(home) + 1:]
                if sys.platform == 'win32':  # pragma no cover
                    script.write('cd %HOMEDRIVE%%HOMEPATH%\n')
                else:
                    script.write('cd $HOME\n')
            else:
                # This can potentially cause problems...
                self._logger.warning('work %r not a descendant of home %r',
                                     work, home)
            if ' ' in work:
                work = '"%s"' % work
            script.write('cd %s\n' % work)

            script.write(self._fix_path(resource_desc['remote_command']))

            if 'args' in resource_desc:
                for arg in resource_desc['args']:
                    arg = self._fix_path(arg)
                    if ' ' in arg and arg[0] not in ('"', "'"):
                        arg = '"%s"' % arg
                    script.write(' %s' % arg)

            script.write(' <%s' % (inp or DEV_NULL))
            script.write(' >%s' % (out or '%s.stdout' % base))
            if join_files or err is None:
                script.write(' 2>&1')
            else:
                script.write(' 2>%s' % err)
            script.write('\n')

        if sys.platform != 'win32':
            os.chmod(script_name, 0700)

        # Add 'escape' clause.
        cmd.extend(native_specification)

        with open(script_name, 'rU') as inp:
            self._logger.debug('%s:', script_name)
            for line in inp:
                self._logger.debug('    %s', line.rstrip())

        # Submit job.
        cmd.append(os.path.join('.', script_name))
        self._logger.info('%r', ' '.join(cmd))
        try:
            process = ShellProc(cmd, DEV_NULL, 'qsub.out', STDOUT, env)
        except Exception as exc:
            self._logger.error('exception creating process: %s', exc)
            if os.path.exists('qsub.out'):
                with open('qsub.out', 'rU') as inp:
                    self._logger.error('qsub.out:')
                    for line in inp:
                        self._logger.error('    %s', line.rstrip())
            raise

        # Submitted, wait for completion.
        self._logger.debug('    PID = %d', process.pid)
        return_code, error_msg = process.wait(1)
        self._logger.debug('    returning %s', (return_code, error_msg))
        if return_code and os.path.exists('qsub.out'):
            if join_files or err is None:
                qsub_echo = out or '%s.stdout' % base
            else:
                qsub_echo = err
            with open('qsub.out', 'rU') as inp:
                with open(qsub_echo, 'a+') as out:
                    self._logger.error('qsub.out:')
                    out.write('===== qsub.out =====\n')
                    for line in inp:
                        self._logger.error('    %s', line.rstrip())
                        out.write(line)
        return (return_code, error_msg)
Esempio n. 8
0
    def execute_command(self, resource_desc):
        """
        Submit command based on `resource_desc`.

        resource_desc: dict
            Description of command and required resources.

        The '-V' `qsub` option is always used to export the current environment
        to the job. This environment is first updated with any 'job_environment'
        data. The '-sync yes' `qsub` option is used to wait for job completion.

        Other job resource keys are processed as follows:

        ========================= =========================
        Resource Key              Translation
        ========================= =========================
        ``submit_as_hold``        -h
        ------------------------- -------------------------
        rerunnable                -r yes|no
        ------------------------- -------------------------
        ``working_directory``     -wd `value`
        ------------------------- -------------------------
        ``job_category``          Sets parallel environment
        ------------------------- -------------------------
        ``min_cpus``              Sets parallel environment
        ------------------------- -------------------------
        ``max_cpus``              Sets parallel environment
        ------------------------- -------------------------
        ``min_phys_memory``       Ignored
        ------------------------- -------------------------
        email                     -M `value`
        ------------------------- -------------------------
        ``email_on_started``      -m b
        ------------------------- -------------------------
        ``email_on_terminated``   -m e
        ------------------------- -------------------------
        ``job_name``              -N `value`
        ------------------------- -------------------------
        ``input_path``            -i `value`
        ------------------------- -------------------------
        ``output_path``           -o `value`
        ------------------------- -------------------------
        ``error_path``            -e `value`
        ------------------------- -------------------------
        ``join_files``            -j yes|no
        ------------------------- -------------------------
        ``reservation_id``        -ar `value`
        ------------------------- -------------------------
        ``queue_name``            -q `value`
        ------------------------- -------------------------
        priority                  -p `value`
        ------------------------- -------------------------
        ``start_time``            -a `value`
        ------------------------- -------------------------
        ``deadline_time``         Ignored
        ------------------------- -------------------------
        ``accounting_id``         -A `value`
        ========================= =========================

        Where `value` is the corresponding resource value.

        If 'working_directory' is not specified, add ``-cwd``.
        If 'input_path' is not specified, add ``-i /dev/null``.
        If 'output_path' is not specified, add ``-o <remote_command>.stdout``.
        If 'error_path' is not specified, add ``-j yes``.

        If 'native_specification' is specified, it is added to the `qsub`
        command just before 'remote_command' and 'args'.

        If specified, 'job_category' is used to index into the category
        map set up during allocator configuration.  The mapped category
        name as well as the 'min_cpus' and 'max_cpus' values are used
        with the ``-pe`` qsub option.

        Some resource limits are also handled:

        ==================== =========================
        Resource Key         Translation
        ==================== =========================
        ``core_file_size``   Ignored
        -------------------- -------------------------
        ``data_seg_size``    Ignored
        -------------------- -------------------------
        ``file_size``        Ignored
        -------------------- -------------------------
        ``open_files``       Ignored
        -------------------- -------------------------
        ``stack_size``       Ignored
        -------------------- -------------------------
        ``virtual_memory``   Ignored
        -------------------- -------------------------
        ``cpu_time``         -l h_cpu= `value`
        -------------------- -------------------------
        ``wallclock_time``   -l h_rt= `value`
        ==================== =========================

        Output from `qsub` itself is routed to ``qsub.out``.
        """
        self.home_dir = os.path.expanduser('~')
        self.work_dir = ''

        cmd = list(self._QSUB)
        cmd.extend(('-V', '-sync', 'yes', '-b', 'yes'))
        env = None
        inp, out, err = None, None, None

        # Set working directory now, for possible path fixing.
        try:
            value = resource_desc['working_directory']
        except KeyError:
            pass
        else:
            self.work_dir = self._fix_path(value)
            cmd.extend(('-wd', value))

        # Process description in fixed, repeatable order.
        keys = ('submit_as_hold', 'rerunnable', 'job_environment', 'email',
                'email_on_started', 'email_on_terminated', 'job_name',
                'input_path', 'output_path', 'error_path', 'join_files',
                'reservation_id', 'queue_name', 'priority', 'start_time',
                'accounting_id')

        email_events = ''
        for key in keys:
            try:
                value = resource_desc[key]
            except KeyError:
                continue

            if key == 'submit_as_hold':
                if value:
                    cmd.append('-h')
            elif key == 'rerunnable':
                cmd.extend(('-r', 'yes' if value else 'no'))
            elif key == 'job_environment':
                env = value
            elif key == 'email':
                cmd.extend(('-M', ','.join(value)))
            elif key == 'email_on_started':
                email_events += 'b'
            elif key == 'email_on_terminated':
                email_events += 'e'
            elif key == 'job_name':
                if value:
                    cmd.extend(('-N', self._jobname(value)))
            elif key == 'input_path':
                cmd.extend(('-i', self._fix_path(value)))
                inp = value
            elif key == 'output_path':
                cmd.extend(('-o', self._fix_path(value)))
                out = value
            elif key == 'error_path':
                cmd.extend(('-e', self._fix_path(value)))
                err = value
            elif key == 'join_files':
                cmd.extend(('-j', 'yes' if value else 'no'))
                if value:
                    err = 'yes'
            elif key == 'reservation_id':
                cmd.extend(('-ar', value))
            elif key == 'queue_name':
                cmd.extend(('-q', value))
            elif key == 'priority':
                cmd.extend(('-p', str(value)))
            elif key == 'start_time':
                cmd.extend(('-a', value.strftime('%Y%m%d%H%M.%S')))
            elif key == 'accounting_id':
                cmd.extend(('-A', value))

        if email_events:
            cmd.extend(('-m', email_events))

        # Setup parallel environment.
        if 'job_category' in resource_desc:
            job_category = resource_desc['job_category']
            try:
                parallel_environment = self.category_map[job_category]
            except KeyError:
                msg = 'No mapping for job_category %r' % job_category
                self._logger.error(msg)
                raise ValueError(msg)
            min_cpus = resource_desc.get('min_cpus', 1)
            max_cpus = resource_desc.get('max_cpus', min_cpus)
            cmd.extend(
                ('-pe', parallel_environment, '%d-%d' % (min_cpus, max_cpus)))

        # Set resource limits.
        if 'resource_limits' in resource_desc:
            limits = resource_desc['resource_limits']
            if 'cpu_time' in limits:
                cpu_time = limits['cpu_time']
                cmd.extend(('-l', 'h_cpu=%s' % self._timelimit(cpu_time)))
            if 'wallclock_time' in limits:
                wall_time = limits['wallclock_time']
                cmd.extend(('-l', 'h_rt=%s' % self._timelimit(wall_time)))

        # Set default command configuration.
        if not self.work_dir:
            cmd.append('-cwd')
        if inp is None:
            cmd.extend(('-i', DEV_NULL))
        if out is None:
            base = os.path.basename(resource_desc['remote_command'])
            cmd.extend(('-o', '%s.stdout' % base))
        if err is None:
            cmd.extend(('-j', 'yes'))

        # Add 'escape' clause.
        if 'native_specification' in resource_desc:
            cmd.extend(resource_desc['native_specification'])

        cmd.append(self._fix_path(resource_desc['remote_command']))

        if 'args' in resource_desc:
            for arg in resource_desc['args']:
                cmd.append(self._fix_path(arg))

        self._logger.info('%r', ' '.join(cmd))
        try:
            process = ShellProc(cmd, DEV_NULL, 'qsub.out', STDOUT, env)
        except Exception as exc:
            self._logger.error('exception creating process: %s', exc)
            raise

        self._logger.debug('    PID = %d', process.pid)
        return_code, error_msg = process.wait(1)
        self._logger.debug('    returning %s', (return_code, error_msg))
        return (return_code, error_msg)
def start_server(authkey='PublicKey',
                 address=None,
                 port=0,
                 prefix='server',
                 allowed_hosts=None,
                 allowed_users=None,
                 allow_shell=False,
                 allowed_types=None,
                 timeout=None,
                 tunnel=False,
                 resources=None,
                 log_prefix=None):
    """
    Start an :class:`ObjServerFactory` service in a separate process
    in the current directory.

    authkey: string
        Authorization key; must be matched by clients.

    address: string
        IPv4 address, hostname, or pipe name.
        Default is the host's default IPv4 address.

    port: int
        Server port (default of 0 implies next available port).
        Note that ports below 1024 typically require special privileges.
        If port is negative, then a local pipe is used for communication.

    prefix: string
        Prefix for server config file and stdout/stderr file.

    allowed_hosts: list(string)
        Host address patterns to check against. Required if `port` >= 0.
        Ignored if `allowed_users` is specified.

    allowed_users: dict
        Dictionary of users and corresponding public keys allowed access.
        If None, *any* user may access. If empty, no user may access.
        The host portions of user strings are used for address patterns.

    allow_shell: bool
        If True, :meth:`execute_command` and :meth:`load_model` are allowed.
        Use with caution!

    allowed_types: list(string)
        Names of types which may be created. If None, then allow types listed
        by :meth:`get_available_types`. If empty, no types are allowed.

    timeout: int
        Seconds to wait for server to start. Note that public key generation
        can take a while. The default value of None will use an internally
        computed value based on host type (and for Windows, the availability
        of pyWin32).

    tunnel: bool
        If True, report host IP address but listen for connections from a
        local SSH tunnel.

    resources: string
        Filename for resource configuration.

    log_prefix: string
        Name used to identify remote remote logging messages from server.
        Implies that the local process will be receiving the messages.

    Returns ``(server_proc, config_filename)``.
    """
    if timeout is None:
        if sys.platform == 'win32' and not HAVE_PYWIN32:  # pragma no cover
            timeout = 120
        else:
            timeout = 30

    server_key = prefix + '.key'
    server_cfg = prefix + '.cfg'
    server_out = prefix + '.out'
    for path in (server_cfg, server_out):
        if os.path.exists(path):
            os.remove(path)

    with open(server_key, 'w') as out:
        out.write('%s\n' % authkey)

    factory_path = pkg_resources.resource_filename('openmdao.main',
                                                   'objserverfactory.py')
    args = ['python', factory_path, '--port', str(port), '--prefix', prefix]

    if address is not None:
        args.extend(['--address', address])

    if tunnel:
        args.append('--tunnel')

    if resources is not None:
        args.append('--resources')
        args.append(resources)

    if allowed_users is not None:
        write_authorized_keys(allowed_users, 'users.allow',
                              logging.getLogger())
        args.extend(['--users', 'users.allow'])
    else:
        args.append('--allow-public')
        if port >= 0:
            if allowed_hosts is None:
                allowed_hosts = [socket.gethostbyname(socket.gethostname())]
                if allowed_hosts[0].startswith('127.') and \
                                '127.0.0.1' not in allowed_hosts:
                    allowed_hosts.append('127.0.0.1')
            with open('hosts.allow', 'w') as out:
                for pattern in allowed_hosts:
                    out.write('%s\n' % pattern)
            if sys.platform != 'win32' or HAVE_PYWIN32:
                make_private('hosts.allow')
            else:  # pragma no cover
                logging.warning("Can't make hosts.allow private")

    if allow_shell:
        args.append('--allow-shell')

    if allowed_types is not None:
        with open('types.allow', 'w') as out:
            for typname in allowed_types:
                out.write('%s\n' % typname)
        if sys.platform != 'win32' or HAVE_PYWIN32:
            make_private('types.allow')
        else:  # pragma no cover
            logging.warning("Can't make types.allow private")
        args.extend(['--types', 'types.allow'])

    if log_prefix is not None:
        log_host = socket.gethostname()
        log_port = logging_port(log_host, log_host)
        args.extend(['--log-host', log_host, '--log-port', str(log_port)])
        if log_prefix:  # Could be null (for default).
            args.extend(['--log-prefix', log_prefix])

    proc = ShellProc(args, stdout=server_out, stderr=STDOUT)

    try:
        # Wait for valid server_cfg file.
        retry = 0
        while (not os.path.exists(server_cfg)) or \
                (os.path.getsize(server_cfg) == 0):
            return_code = proc.poll()
            if return_code:
                error_msg = proc.error_message(return_code)
                raise RuntimeError('Server startup failed %s' % error_msg)
            retry += 1
            if retry < 10 * timeout:
                time.sleep(.1)
            # Hard to cause a startup timeout.
            else:  # pragma no cover
                proc.terminate(timeout)
                raise RuntimeError('Server startup timeout')
        return (proc, server_cfg)
    finally:
        if os.path.exists(server_key):
            os.remove(server_key)
    def execute_command(self, resource_desc):
        """
        Run command described by `resource_desc` in a subprocess if this
        server's `allow_shell` attribute is True.

        resource_desc: dict
            Contains job description.

        The current environment, along with any 'job_environment' specification,
        is in effect while running 'remote_command'.

        If 'input_path' is not specified, ``/dev/null`` or ``nul:`` is used.
        If 'output_path' is not specified, ``<remote_command>.stdout`` is used.
        If neither 'error_path' nor 'join_files' are specified,
        ``<remote_command>.stderr`` is used.

        If specified in the 'resource_limits' dictionary, 'wallclock_time' is
        used as a timeout.

        All other queuing resource keys are ignored.

        The ``HOME_DIRECTORY`` and ``WORKING_DIRECTORY`` placeholders are
        ignored.
        """
        try:
            job_name = resource_desc['job_name']
        except KeyError:
            job_name = ''

        command = resource_desc['remote_command']
        self._check_path(command, 'execute_command')
        base = os.path.basename(command)
        command = [command]
        if 'args' in resource_desc:
            command.extend(resource_desc['args'])

        self._logger.debug('execute_command %s %r', job_name, command)
        if not self._allow_shell:
            self._logger.error('attempt to execute %r by %r', command,
                               get_credentials().user)
            raise RuntimeError('shell access is not allowed by this server')

        env_vars = resource_desc.get('job_environment')

        try:
            stdin = resource_desc['input_path']
            self._check_path(stdin, 'execute_command')
        except KeyError:
            stdin = DEV_NULL

        try:
            stdout = resource_desc['output_path']
            self._check_path(stdout, 'execute_command')
        except KeyError:
            stdout = base + '.stdout'

        try:
            stderr = resource_desc['error_path']
            self._check_path(stderr, 'execute_command')
        except KeyError:
            try:
                join_files = resource_desc['join_files']
            except KeyError:
                stderr = base + '.stderr'
            else:
                stderr = STDOUT if join_files else base + '.stderr'

        limits = resource_desc.get('resource_limits', {})
        timeout = limits.get('wallclock_time', 0)
        poll_delay = 1

        try:
            process = ShellProc(command, stdin, stdout, stderr, env_vars)
        except Exception as exc:
            self._logger.error('exception creating process: %s', exc)
            raise

        self._logger.debug('    PID = %d', process.pid)
        return_code, error_msg = process.wait(poll_delay, timeout)
        self._logger.debug('    returning %s', (return_code, error_msg))
        return (return_code, error_msg)
Esempio n. 11
0
    def execute_command(self, resource_desc):
        """
        Submit command based on `resource_desc`.

        resource_desc: dict
            Description of command and required resources.

        The '-V' `qsub` option is always used to export the current environment
        to the job. This environment is first updated with any 'job_environment'
        data. The '-sync yes' `qsub` option is used to wait for job completion.

        Other job resource keys are processed as follows:

        ========================= ====================
        Resource Key              Translation
        ========================= ====================
        job_name                  -N `value`
        ------------------------- --------------------
        working_directory         -wd `value`
        ------------------------- --------------------
        parallel_environment      -pe `value` `n_cpus`
        ------------------------- --------------------
        input_path                -i `value`
        ------------------------- --------------------
        output_path               -o `value`
        ------------------------- --------------------
        error_path                -e `value`
        ------------------------- --------------------
        join_files                -j yes|no
        ------------------------- --------------------
        email                     -M `value`
        ------------------------- --------------------
        block_email               -m n
        ------------------------- --------------------
        email_events              -m `value`
        ------------------------- --------------------
        start_time                -a `value`
        ------------------------- --------------------
        deadline_time             Not supported
        ------------------------- --------------------
        hard_wallclock_time_limit -l h_rt= `value`
        ------------------------- --------------------
        soft_wallclock_time_limit -l s_rt= `value`
        ------------------------- --------------------
        hard_run_duration_limit   -l h_cpu= `value`
        ------------------------- --------------------
        soft_run_duration_limit   -l s_cpu= `value`
        ------------------------- --------------------
        job_category              Not supported
        ========================= ====================

        Where `value` is the corresponding resource value and
        `n_cpus` is the value of the 'n_cpus' resource, or 1.

        If 'working_directory' is not specified, add ``-cwd``.
        If 'input_path' is not specified, add ``-i /dev/null``.
        If 'output_path' is not specified, add ``-o <remote_command>.stdout``.
        If 'error_path' is not specified, add ``-j yes``.

        If 'native_specification' is specified, it is added to the `qsub`
        command just before 'remote_command' and 'args'.

        Output from `qsub` itself is routed to ``qsub.out``.
        """
        self.home_dir = os.environ['HOME']
        self.work_dir = ''

        cmd = [self._QSUB, '-V', '-sync', 'yes']
        env = None
        inp, out, err = None, None, None

        # Set working directory now, for possible path fixing.
        try:
            value = resource_desc['working_directory']
        except KeyError:
            pass
        else:
            self.work_dir = self._fix_path(value)
            cmd.append('-wd')
            cmd.append(value)

        # Process description in fixed, repeatable order.
        keys = ('job_name',
                'job_environment',
                'parallel_environment',
                'input_path',
                'output_path',
                'error_path',
                'join_files',
                'email',
                'block_email',
                'email_events',
                'start_time',
                'hard_wallclock_time_limit',
                'soft_wallclock_time_limit',
                'hard_run_duration_limit',
                'soft_run_duration_limit')

        for key in keys:
            try:
                value = resource_desc[key]
            except KeyError:
                continue

            if key == 'job_name':
                cmd.append('-N')
                cmd.append(value)
            elif key == 'job_environment':
                env = value
            elif key == 'parallel_environment':
                n_cpus = resource_desc.get('n_cpus', 1)
                cmd.append('-pe')
                cmd.append(value)
                cmd.append(str(n_cpus))
            elif key == 'input_path':
                cmd.append('-i')
                cmd.append(self._fix_path(value))
                inp = value
            elif key == 'output_path':
                cmd.append('-o')
                cmd.append(self._fix_path(value))
                out = value
            elif key == 'error_path':
                cmd.append('-e')
                cmd.append(self._fix_path(value))
                err = value
            elif key == 'join_files':
                cmd.append('-j')
                cmd.append('yes' if value else 'no')
                if value:
                    err = 'yes'
            elif key == 'email':
                cmd.append('-M')
                cmd.append(','.join(value))
            elif key == 'block_email':
                if value:
                    cmd.append('-m')
                    cmd.append('n')
            elif key == 'email_events':
                cmd.append('-m')
                cmd.append(value)
            elif key == 'start_time':
                cmd.append('-a')
                cmd.append(value)  # May need to translate
            elif key == 'hard_wallclock_time_limit':
                cmd.append('-l')
                cmd.append('h_rt=%s' % self._make_time(value))
            elif key == 'soft_wallclock_time_limit':
                cmd.append('-l')
                cmd.append('s_rt=%s' % self._make_time(value))
            elif key == 'hard_run_duration_limit':
                cmd.append('-l')
                cmd.append('h_cpu=%s' % self._make_time(value))
            elif key == 'soft_run_duration_limit':
                cmd.append('-l')
                cmd.append('s_cpu=%s' % self._make_time(value))

        if not self.work_dir:
            cmd.append('-cwd')

        if inp is None:
            cmd.append('-i')
            cmd.append('/dev/null')
        if out is None:
            cmd.append('-o')
            cmd.append('%s.stdout'
                       % os.path.basename(resource_desc['remote_command']))
        if err is None:
            cmd.append('-j')
            cmd.append('yes')

        if 'native_specification' in resource_desc:
            cmd.extend(resource_desc['native_specification'])

        cmd.append(self._fix_path(resource_desc['remote_command']))

        if 'args' in resource_desc:
            for arg in resource_desc['args']:
                cmd.append(self._fix_path(arg))

        self._logger.info('%r', ' '.join(cmd))
        try:
            process = ShellProc(cmd, '/dev/null', 'qsub.out', STDOUT, env)
        except Exception as exc:
            self._logger.error('exception creating process: %s', exc)
            raise

        self._logger.debug('    PID = %d', process.pid)
        return_code, error_msg = process.wait(1)
        self._logger.debug('    returning %s', (return_code, error_msg))
        return (return_code, error_msg)