Esempio n. 1
0
def setup_tunnel(address, port):
    """
    Setup tunnel to `address` and `port` assuming:

    - The remote login name matches the local login name.
    - `port` is available on the local host.
    - 'plink' is available on Windows, 'ssh' on other platforms.
    - No user interaction is required to connect via 'plink'/'ssh'.

    address: string
        IPv4 address to tunnel to.

    port: int
        Port at `address` to tunnel to.

    Returns ``(local_address, local_port)``.
    """
    logname = 'tunnel-%s-%d.log' % (address, port)
    logname = os.path.join(os.getcwd(), logname)
    stdout = open(logname, 'w')

    user = getpass.getuser()
    if sys.platform == 'win32':  # pragma no cover
        stdin = open('nul:', 'r')
        args = ['plink', '-ssh', '-l', user,
                '-L', '%d:localhost:%d' % (port, port), address]
    else:
        stdin = open('/dev/null', 'r')
        args = ['ssh', '-l', user,
                '-L', '%d:localhost:%d' % (port, port), address]

    tunnel_proc = ShellProc(args, stdin=stdin, stdout=stdout, stderr=STDOUT)
    sock = socket.socket(socket.AF_INET)
    address = ('127.0.0.1', port)
    for retry in range(20):
        time.sleep(.5)
        exitcode = tunnel_proc.poll()
        if exitcode is not None:
            msg = 'ssh tunnel process exited with exitcode %d,' \
                  ' output in %s' % (exitcode, logname)
            logging.error(msg)
            raise RuntimeError(msg)
        try:
            sock.connect(address)
        except socket.error as exc:
            if exc.args[0] != errno.ECONNREFUSED and \
               exc.args[0] != errno.ENOENT:
                raise
        else:
            atexit.register(_cleanup_tunnel, tunnel_proc, logname)
            sock.close()
            return address

    _cleanup_tunnel(tunnel_proc, logname)
    raise RuntimeError('Timeout trying to connect through tunnel to %s'
                       % address)
Esempio n. 2
0
    def _get_hosts(self):
        """ Return list of hostnames sorted by load. """
        # Get host load information.
        try:
            proc = ShellProc(self._QHOST, stdout=PIPE)
        except Exception as exc:
            self._logger.error('%r failed: %s' % (self._QHOST, exc))
            return []
        lines = proc.stdout.readlines()

        # Reduce to hosts we're interested in and sort by CPU-adjusted load.
        loads = []
        for line in lines:
            if line.startswith(('HOSTNAME', '-')):
                continue
            hostname, arch, ncpu, load, \
                memtot, memuse, swapto, swapus = line.split()
            if self.pattern:
                if not fnmatch.fnmatchcase(hostname, self.pattern):
                    continue
            try:
                load = float(load)
                ncpu = int(ncpu)
            except ValueError:
                continue
            loads.append((hostname, load / ncpu, ncpu))
        loads = sorted(loads, key=lambda item: item[1])

        # Return list of hostnames.
        hosts = []
        for hostname, load, ncpu in loads:
            for i in range(ncpu):
                hosts.append(hostname)
        return hosts
    def test_errormsg(self):
        logging.debug('')
        logging.debug('test_errormsg')

        cmd = 'dir' if sys.platform == 'win32' else 'ls'
        try:
            proc = ShellProc(cmd, stdout='stdout', stderr='stderr')
            proc.wait()
        finally:
            if os.path.exists('stdout'):
                os.remove('stdout')
            if os.path.exists('stderr'):
                os.remove('stderr')

        msg = proc.error_message(-signal.SIGTERM)
        if sys.platform == 'win32':
            self.assertEqual(msg, '')
        else:
            self.assertEqual(msg, ': SIGTERM')
Esempio n. 4
0
    def test_errormsg(self):
        logging.debug('')
        logging.debug('test_errormsg')

        cmd = 'dir' if sys.platform == 'win32' else 'ls'
        try:
            proc = ShellProc(cmd, stdout='stdout', stderr='stderr')
            proc.wait()
        finally:
            if os.path.exists('stdout'):
                os.remove('stdout')
            if os.path.exists('stderr'):
                os.remove('stderr')

        msg = proc.error_message(-signal.SIGTERM)
        if sys.platform == 'win32':
            self.assertEqual(msg, '')
        else:
            self.assertEqual(msg, ': SIGTERM')
    def execute_command(self, command, stdin, stdout, stderr, env_vars,
                        poll_delay, timeout):
        """
        Run `command` in a subprocess if this server's `allow_shell`
        attribute is True.

        command: string
            Command line to be executed.

        stdin, stdout, stderr: string
            Filenames for the corresponding stream.

        env_vars: dict
            Environment variables for the command.

        poll_delay: float (seconds)
            Delay between polling subprocess for completion.

        timeout: float (seconds)
            Maximum time to wait for command completion. A value of zero
            implies no timeout.
        """
        self._logger.debug('execute_command %r', command)
        if not self._allow_shell:
            self._logger.error('attempt to execute %r by %r', command,
                               get_credentials().user)
            raise RuntimeError('shell access is not allowed by this server')

        for arg in (stdin, stdout, stderr):
            if isinstance(arg, basestring):
                self._check_path(arg, 'execute_command')
        try:
            process = ShellProc(command, stdin, stdout, stderr, env_vars)
        except Exception as exc:
            self._logger.error('exception creating process: %s', exc)
            raise

        self._logger.debug('    PID = %d', process.pid)
        return_code, error_msg = process.wait(poll_delay, timeout)
        self._logger.debug('    returning %s', (return_code, error_msg))
        return (return_code, error_msg)
    def _execute_local(self):
        """ Run command. """
        self._logger.info('executing %s...', self.command)
        start_time = time.time()

        self._process = \
            ShellProc(self.command, self.stdin, self.stdout, self.stderr,
                      self.env_vars)
        self._logger.debug('PID = %d', self._process.pid)

        try:
            return_code, error_msg = \
                self._process.wait(self.poll_delay, self.timeout)
        finally:
            self._process.close_files()
            self._process = None

        et = time.time() - start_time
        if et >= 60:  #pragma no cover
            self._logger.info('elapsed time: %.1f sec.', et)

        return (return_code, error_msg)
Esempio n. 7
0
def _unused_remote_port(address, port, user, identity):
    """ Return a (currently) unused port on `address`, default to `port`. """
    if '@' in address:
        user, host = address.split('@')
    else:
        user = user or getpass.getuser()
        host = address

    if sys.platform == 'win32':  # pragma no cover
        cmd = ['plink', '-batch', '-ssh']
    else:
        cmd = ['ssh']

    cmd += ['-l', user]
    if identity:
        cmd += ['-i', identity]
    cmd += ['-x', '-T']

    # FIXME: this currently won't work for Windows if ssh doesn't connect to a
    # UNIX-like shell (cygwin, etc.)
    code = '''"import socket
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.bind(('localhost', 0))
port = sock.getsockname()[1]
sock.close()
print 'port', port"'''

    cmd += [host, 'python', '-c', code.replace('\n', ';')]
    try:
        proc = ShellProc(cmd,
                         stdout=PIPE,
                         stderr=PIPE,
                         universal_newlines=True)
    except Exception as exc:
        logging.warning("Can't get unused port on %s from %s (forcing %s): %s",
                        host, cmd, port, exc)
        return port

    output = proc.stdout.read()
    for line in output.split('\n'):
        if line.startswith('port'):
            remote_port = int(line.split()[1])
            logging.debug('Unused remote port %s on %s', remote_port, host)
            return remote_port
    else:
        logging.warning(
            "Can't get unused port on %s from %s (forcing %s):\n"
            "[stdout]\n%s\n[stderr]\n%s", host, cmd, port, output,
            proc.stderr.read())
        return port
Esempio n. 8
0
def _start_tunnel(address, port, args, user, identity, prefix):
    """ Start an ssh tunnel process. """
    if '@' in address:
        user, host = address.split('@')
    else:
        user = user or getpass.getuser()
        host = address

    if sys.platform == 'win32':  # pragma no cover
        cmd = ['plink', '-batch', '-ssh']
    else:
        cmd = ['ssh']

    cmd += ['-l', user]
    if identity:
        cmd += ['-i', identity]
    cmd += ['-N', '-x', '-T']  # plink doesn't support '-n' (no stdin)
    cmd += args + [host]

    logname = '%s-%s-%s.log' % (prefix, host, port)
    logname = os.path.join(os.getcwd(), logname)
    stdout = open(logname, 'w')

    tunnel_proc = None
    try:
        tunnel_proc = ShellProc(cmd, stdout=stdout, stderr=STDOUT)
    except Exception as exc:
        raise RuntimeError("Can't create ssh tunnel process from %s: %s"
                           % (cmd, exc))
    time.sleep(1)
    exitcode = tunnel_proc.poll()
    if exitcode is not None:
        raise RuntimeError('ssh tunnel process for %s:%s exited with exitcode'
                           ' %d, output in %s'
                           % (address, port, exitcode, logname))

    return (_cleanup_tunnel, tunnel_proc, stdout, logname, os.getpid())
Esempio n. 9
0
def _start_tunnel(address, port, args, user, identity, prefix):
    """ Start an ssh tunnel process. """
    if "@" in address:
        user, host = address.split("@")
    else:
        user = user or getpass.getuser()
        host = address

    if sys.platform == "win32":  # pragma no cover
        cmd = ["plink", "-batch", "-ssh"]
    else:
        cmd = ["ssh"]

    cmd += ["-l", user]
    if identity:
        cmd += ["-i", identity]
    cmd += ["-N", "-x", "-T"]  # plink doesn't support '-n' (no stdin)
    cmd += args + [host]

    logname = "%s-%s-%s.log" % (prefix, host, port)
    logname = os.path.join(os.getcwd(), logname)
    stdout = open(logname, "w")

    tunnel_proc = None
    try:
        tunnel_proc = ShellProc(cmd, stdout=stdout, stderr=STDOUT)
    except Exception as exc:
        raise RuntimeError("Can't create ssh tunnel process from %s: %s" % (cmd, exc))
    time.sleep(1)
    exitcode = tunnel_proc.poll()
    if exitcode is not None:
        raise RuntimeError(
            "ssh tunnel process for %s:%s exited with exitcode" " %d, output in %s" % (address, port, exitcode, logname)
        )

    return (_cleanup_tunnel, tunnel_proc, stdout, logname, os.getpid())
Esempio n. 10
0
    def _execute_local(self):
        """ Run command. """
        self._logger.info("executing '%s'...", self.command)
        start_time = time.time()

        self._process = \
            ShellProc(self.command, self.stdin, self.stdout, self.stderr,
                      self.env_vars)
        self._logger.debug('PID = %d', self._process.pid)

        try:
            return_code, error_msg = \
                self._process.wait(self.poll_delay, self.timeout)
        finally:
            self._process.close_files()
            self._process = None

        et = time.time() - start_time
        if et >= 60:  #pragma no cover
            self._logger.info('elapsed time: %.1f sec.', et)

        return (return_code, error_msg)
Esempio n. 11
0
    def execute_command(self, resource_desc):
        """
        Submit command based on `resource_desc`.

        resource_desc: dict
            Description of command and required resources.

        The '-V' `qsub` option is always used to export the current environment
        to the job. This environment is first updated with any 'job_environment'
        data. The '-sync yes' `qsub` option is used to wait for job completion.

        Other job resource keys are processed as follows:

        ========================= =========================
        Resource Key              Translation
        ========================= =========================
        submit_as_hold            -h
        ------------------------- -------------------------
        rerunnable                -r yes|no
        ------------------------- -------------------------
        working_directory         -wd `value`
        ------------------------- -------------------------
        job_category              Sets parallel environment
        ------------------------- -------------------------
        min_cpus                  Sets parallel environment
        ------------------------- -------------------------
        max_cpus                  Sets parallel environment
        ------------------------- -------------------------
        min_phys_memory           Ignored
        ------------------------- -------------------------
        email                     -M `value`
        ------------------------- -------------------------
        email_on_started          -m b
        ------------------------- -------------------------
        email_on_terminated       -m e
        ------------------------- -------------------------
        job_name                  -N `value`
        ------------------------- -------------------------
        input_path                -i `value`
        ------------------------- -------------------------
        output_path               -o `value`
        ------------------------- -------------------------
        error_path                -e `value`
        ------------------------- -------------------------
        join_files                -j yes|no
        ------------------------- -------------------------
        reservation_id            -ar `value`
        ------------------------- -------------------------
        queue_name                -q `value`
        ------------------------- -------------------------
        priority                  -p `value`
        ------------------------- -------------------------
        start_time                -a `value`
        ------------------------- -------------------------
        deadline_time             Ignored
        ------------------------- -------------------------
        accounting_id             -A `value`
        ========================= =========================

        Where `value` is the corresponding resource value.

        If 'working_directory' is not specified, add ``-cwd``.
        If 'input_path' is not specified, add ``-i /dev/null``.
        If 'output_path' is not specified, add ``-o <remote_command>.stdout``.
        If 'error_path' is not specified, add ``-j yes``.

        If 'native_specification' is specified, it is added to the `qsub`
        command just before 'remote_command' and 'args'.

        If specified, 'job_category' is used to index into the category
        map set up during allocator configuration.  The mapped category
        name as well as the 'min_cpus' and 'max_cpus' values are used
        with the ``-pe`` qsub option.

        Some resource limits are also handled:

        ==================== =========================
        Resource Key         Translation
        ==================== =========================
        core_file_size       Ignored
        -------------------- -------------------------
        data_seg_size        Ignored
        -------------------- -------------------------
        file_size            Ignored
        -------------------- -------------------------
        open_files           Ignored
        -------------------- -------------------------
        stack_size           Ignored
        -------------------- -------------------------
        virtual_memory       Ignored
        -------------------- -------------------------
        cpu_time             -l h_cpu= `value`
        -------------------- -------------------------
        wallclock_time       -l h_rt= `value`
        ==================== =========================

        Output from `qsub` itself is routed to ``qsub.out``.
        """
        self.home_dir = os.path.expanduser('~')
        self.work_dir = ''

        cmd = list(self._QSUB)
        cmd.extend(('-V', '-sync', 'yes', '-b', 'yes'))
        env = None
        inp, out, err = None, None, None

        # Set working directory now, for possible path fixing.
        try:
            value = resource_desc['working_directory']
        except KeyError:
            pass
        else:
            self.work_dir = self._fix_path(value)
            cmd.extend(('-wd', value))

        # Process description in fixed, repeatable order.
        keys = ('submit_as_hold',
                'rerunnable',
                'job_environment',
                'email',
                'email_on_started',
                'email_on_terminated',
                'job_name',
                'input_path',
                'output_path',
                'error_path',
                'join_files',
                'reservation_id',
                'queue_name',
                'priority',
                'start_time',
                'accounting_id')

        email_events = ''
        for key in keys:
            try:
                value = resource_desc[key]
            except KeyError:
                continue

            if key == 'submit_as_hold':
                if value:
                    cmd.append('-h')
            elif key == 'rerunnable':
                cmd.extend(('-r', 'yes' if value else 'no'))
            elif key == 'job_environment':
                env = value
            elif key == 'email':
                cmd.extend(('-M', ','.join(value)))
            elif key == 'email_on_started':
                email_events += 'b'
            elif key == 'email_on_terminated':
                email_events += 'e'
            elif key == 'job_name':
                cmd.extend(('-N', self._jobname(value)))
            elif key == 'input_path':
                cmd.extend(('-i', self._fix_path(value)))
                inp = value
            elif key == 'output_path':
                cmd.extend(('-o', self._fix_path(value)))
                out = value
            elif key == 'error_path':
                cmd.extend(('-e', self._fix_path(value)))
                err = value
            elif key == 'join_files':
                cmd.extend(('-j', 'yes' if value else 'no'))
                if value:
                    err = 'yes'
            elif key == 'reservation_id':
                cmd.extend(('-ar', value))
            elif key == 'queue_name':
                cmd.extend(('-q', value))
            elif key == 'priority':
                cmd.extend(('-p', str(value)))
            elif key == 'start_time':
                cmd.extend(('-a', value.strftime('%Y%m%d%H%M.%S')))
            elif key == 'accounting_id':
                cmd.extend(('-A', value))

        if email_events:
            cmd.extend(('-m', email_events))

        # Setup parallel environment.
        if 'job_category' in resource_desc:
            job_category = resource_desc['job_category']
            try:
                parallel_environment = self.category_map[job_category]
            except KeyError:
                msg = 'No mapping for job_category %r' % job_category
                self._logger.error(msg)
                raise ValueError(msg)
            min_cpus = resource_desc.get('min_cpus', 1)
            max_cpus = resource_desc.get('max_cpus', min_cpus)
            cmd.extend(('-pe', parallel_environment,
                        '%d-%d' % (min_cpus, max_cpus)))

        # Set resource limits.
        if 'resource_limits' in resource_desc:
            limits = resource_desc['resource_limits']
            if 'cpu_time' in limits:
                cpu_time = limits['cpu_time']
                cmd.extend(('-l', 'h_cpu=%s' % self._timelimit(cpu_time)))
            if 'wallclock_time' in limits:
                wall_time = limits['wallclock_time']
                cmd.extend(('-l', 'h_rt=%s' % self._timelimit(wall_time)))

        # Set default command configuration.
        if not self.work_dir:
            cmd.append('-cwd')
        if inp is None:
            cmd.extend(('-i', DEV_NULL))
        if out is None:
            base = os.path.basename(resource_desc['remote_command'])
            cmd.extend(('-o', '%s.stdout' % base))
        if err is None:
            cmd.extend(('-j', 'yes'))

        # Add 'escape' clause.
        if 'native_specification' in resource_desc:
            cmd.extend(resource_desc['native_specification'])

        cmd.append(self._fix_path(resource_desc['remote_command']))

        if 'args' in resource_desc:
            for arg in resource_desc['args']:
                cmd.append(self._fix_path(arg))

        self._logger.info('%r', ' '.join(cmd))
        try:
            process = ShellProc(cmd, DEV_NULL, 'qsub.out', STDOUT, env)
        except Exception as exc:
            self._logger.error('exception creating process: %s', exc)
            raise

        self._logger.debug('    PID = %d', process.pid)
        return_code, error_msg = process.wait(1)
        self._logger.debug('    returning %s', (return_code, error_msg))
        return (return_code, error_msg)
Esempio n. 12
0
    def execute_command(self, resource_desc):
        """
        Submit command based on `resource_desc`.

        resource_desc: dict
            Description of command and required resources.

        The '-V' `qsub` option is always used to export the current environment
        to the job. This environment is first updated with any 'job_environment'
        data. The '-W block=true' `qsub` option is used to wait for job
        completion.

        Other job resource keys are processed as follows:

        ========================= ===========================
        Resource Key              Translation
        ========================= ===========================
        ``submit_as_hold``        -h
        ------------------------- ---------------------------
        rerunnable                -r y|n
        ------------------------- ---------------------------
        ``working_directory``     Handled in generated script
        ------------------------- ---------------------------
        ``job_category``          Ignored
        ------------------------- ---------------------------
        ``min_cpus``              -l select= `value` :ncpus=1
        ------------------------- ---------------------------
        ``max_cpus``              Ignored
        ------------------------- ---------------------------
        ``min_phys_memory``       Ignored
        ------------------------- ---------------------------
        email                     -M `value`
        ------------------------- ---------------------------
        ``email_on_started``      -m b
        ------------------------- ---------------------------
        ``email_on_terminated``   -m e
        ------------------------- ---------------------------
        ``job_name``              -N `value`
        ------------------------- ---------------------------
        ``input_path``            Handled in generated script
        ------------------------- ---------------------------
        ``output_path``           Handled in generated script
        ------------------------- ---------------------------
        ``error_path``            Handled in generated script
        ------------------------- ---------------------------
        ``join_files``            Handled in generated script
        ------------------------- ---------------------------
        ``reservation_id``        Ignored
        ------------------------- ---------------------------
        ``queue_name``            -q `value`
        ------------------------- ---------------------------
        priority                  -p `value`
        ------------------------- ---------------------------
        ``start_time``            -a `value`
        ------------------------- ---------------------------
        ``deadline_time``         Ignored
        ------------------------- ---------------------------
        ``accounting_id``         -W group_list= `value`
        ========================= ===========================

        Where `value` is the corresponding resource value.

        To support a working directory other than HOME or a
        PBS-generated scratch directory, a short script is written with
        PBS directives in the header. The script will change to the working
        directory and then run the command.
        
        If 'working_directory' is not specified, use current server directory.
        If 'input_path' is not specified, use ``/dev/null``.
        If 'output_path' is not specified, use ``<remote_command>.stdout``.
        If 'error_path' is not specified, use stdout.

        If 'native_specification' is specified, it is added to the `qsub`
        command just before the name of the generated script. If it contains
        a ``select`` clause, then that will prevent generation of a ``select``
        clause related to 'min_cpus'.

        Some resource limits are also handled:

        ==================== =========================
        Resource Key         Translation
        ==================== =========================
        ``core_file_size``   Ignored
        -------------------- -------------------------
        ``data_seg_size``    Ignored
        -------------------- -------------------------
        ``file_size``        Ignored
        -------------------- -------------------------
        ``open_files``       Ignored
        -------------------- -------------------------
        ``stack_size``       Ignored
        -------------------- -------------------------
        ``virtual_memory``   Ignored
        -------------------- -------------------------
        ``cpu_time``         Ignored
        -------------------- -------------------------
        ``wallclock_time``   -l walltime= `value`
        ==================== =========================

        Output from `qsub` itself is routed to ``qsub.out``.
        If the job reports an error, ``qsub.out`` will be appended to either
        `error_path`, or if that was not specified, stdout.
        """
        self.home_dir = os.path.expanduser('~')
        self.work_dir = ''

        cmd = list(self._QSUB)
        cmd.extend(('-V', '-W', 'block=true', '-j', 'oe'))
        if sys.platform == 'win32':  # pragma no cover
            prefix = 'REM PBS'
            cmd.extend(('-C', '"%s"' % prefix))
            suffix = '-qsub.bat'
        else:
            prefix = '#PBS'
            cmd.extend(('-S', '/bin/sh'))
            suffix = '.qsub'
        env = None
        inp, out, err = None, None, None
        join_files = False

        # Set working directory now, for possible path fixing.
        try:
            value = resource_desc['working_directory']
        except KeyError:
            pass
        else:
            self.work_dir = self._fix_path(value)

        # Write script to be submitted rather than putting everything on
        # 'qsub' command line. We have to do this since otherwise there's
        # no way to set an execution directory or input path.
        base = None
        if 'job_name' in resource_desc:
            base = self._jobname(resource_desc['job_name'])
        if not base:
            base = os.path.basename(resource_desc['remote_command'])
        script_name = '%s%s' % (base, suffix)

        native_specification = resource_desc.get('native_specification', [])

        with open(script_name, 'w') as script:
            if sys.platform == 'win32':  # pragma no cover
                script.write('@echo off\n')
            else:
                script.write('#!/bin/sh\n')

            # PBS (at least at NAS) requires 'group_list' be set.
            if 'accounting_id' in resource_desc:
                accounting_id = resource_desc['accounting_id']
            else:
                accounting_id = self.accounting_id
            script.write('%s -W group_list=%s\n' %
                         (prefix, accounting_id.strip()))

            # Process description in fixed, repeatable order.
            keys = ('submit_as_hold', 'rerunnable', 'job_environment',
                    'min_cpus', 'email', 'email_on_started',
                    'email_on_terminated', 'job_name', 'input_path',
                    'output_path', 'error_path', 'join_files', 'queue_name',
                    'priority', 'start_time')

            email_events = ''
            for key in keys:
                try:
                    value = resource_desc[key]
                except KeyError:
                    continue

                if key == 'submit_as_hold':
                    if value:
                        script.write('%s -h\n' % prefix)
                elif key == 'rerunnable':
                    script.write('%s -r %s\n' %
                                 (prefix, 'y' if value else 'n'))
                elif key == 'job_environment':
                    env = value
                elif key == 'min_cpus':
                    # Only write select clause if not in 'native_specification'.
                    for arg in native_specification:
                        if 'select' in arg:
                            break
                    else:
                        script.write('%s -l select=%d:ncpus=1\n' %
                                     (prefix, value))
                elif key == 'email':
                    script.write('%s -M %s\n' % (prefix, ','.join(value)))
                elif key == 'email_on_started':
                    email_events += 'b'
                elif key == 'email_on_terminated':
                    email_events += 'e'
                elif key == 'job_name':
                    value = value or base
                    script.write('%s -N %s\n' % (prefix, self._jobname(value)))
                elif key == 'input_path':
                    inp = value
                elif key == 'output_path':
                    out = value
                elif key == 'error_path':
                    err = value
                elif key == 'join_files':
                    join_files = value
                elif key == 'queue_name':
                    script.write('%s -q %s\n' % (prefix, value))
                elif key == 'priority':
                    script.write('%s -p %d\n' % (prefix, value))
                elif key == 'start_time':
                    script.write('%s -a %s\n' %
                                 (prefix, value.strftime('%Y%m%d%H%M.%S')))

            if email_events:
                script.write('%s -m %s\n' % (prefix, email_events))

            # Set resource limits.
            if 'resource_limits' in resource_desc:
                limits = resource_desc['resource_limits']
                if 'wallclock_time' in limits:
                    wall_time = limits['wallclock_time']
                    script.write('%s -l walltime=%s\n' %
                                 (prefix, self._timelimit(wall_time)))

            # Have script move to work directory relative to
            # home directory on execution host.
            home = os.path.realpath(os.path.expanduser('~'))
            work = os.path.realpath(self.work_dir or os.getcwd())
            if work.startswith(home):
                work = work[len(home) + 1:]
                if sys.platform == 'win32':  # pragma no cover
                    script.write('cd %HOMEDRIVE%%HOMEPATH%\n')
                else:
                    script.write('cd $HOME\n')
            else:
                # This can potentially cause problems...
                self._logger.warning('work %r not a descendant of home %r',
                                     work, home)
            if ' ' in work:
                work = '"%s"' % work
            script.write('cd %s\n' % work)

            script.write(self._fix_path(resource_desc['remote_command']))

            if 'args' in resource_desc:
                for arg in resource_desc['args']:
                    arg = self._fix_path(arg)
                    if ' ' in arg and arg[0] not in ('"', "'"):
                        arg = '"%s"' % arg
                    script.write(' %s' % arg)

            script.write(' <%s' % (inp or DEV_NULL))
            script.write(' >%s' % (out or '%s.stdout' % base))
            if join_files or err is None:
                script.write(' 2>&1')
            else:
                script.write(' 2>%s' % err)
            script.write('\n')

        if sys.platform != 'win32':
            os.chmod(script_name, 0700)

        # Add 'escape' clause.
        cmd.extend(native_specification)

        with open(script_name, 'rU') as inp:
            self._logger.debug('%s:', script_name)
            for line in inp:
                self._logger.debug('    %s', line.rstrip())

        # Submit job.
        cmd.append(os.path.join('.', script_name))
        self._logger.info('%r', ' '.join(cmd))
        try:
            process = ShellProc(cmd, DEV_NULL, 'qsub.out', STDOUT, env)
        except Exception as exc:
            self._logger.error('exception creating process: %s', exc)
            if os.path.exists('qsub.out'):
                with open('qsub.out', 'rU') as inp:
                    self._logger.error('qsub.out:')
                    for line in inp:
                        self._logger.error('    %s', line.rstrip())
            raise

        # Submitted, wait for completion.
        self._logger.debug('    PID = %d', process.pid)
        return_code, error_msg = process.wait(1)
        self._logger.debug('    returning %s', (return_code, error_msg))
        if return_code and os.path.exists('qsub.out'):
            if join_files or err is None:
                qsub_echo = out or '%s.stdout' % base
            else:
                qsub_echo = err
            with open('qsub.out', 'rU') as inp:
                with open(qsub_echo, 'a+') as out:
                    self._logger.error('qsub.out:')
                    out.write('===== qsub.out =====\n')
                    for line in inp:
                        self._logger.error('    %s', line.rstrip())
                        out.write(line)
        return (return_code, error_msg)
Esempio n. 13
0
    def execute_command(self, resource_desc):
        """
        Submit command based on `resource_desc`.

        resource_desc: dict
            Description of command and required resources.

        The '-V' `qsub` option is always used to export the current environment
        to the job. This environment is first updated with any 'job_environment'
        data. The '-sync yes' `qsub` option is used to wait for job completion.

        Other job resource keys are processed as follows:

        ========================= =========================
        Resource Key              Translation
        ========================= =========================
        ``submit_as_hold``        -h
        ------------------------- -------------------------
        rerunnable                -r yes|no
        ------------------------- -------------------------
        ``working_directory``     -wd `value`
        ------------------------- -------------------------
        ``job_category``          Sets parallel environment
        ------------------------- -------------------------
        ``min_cpus``              Sets parallel environment
        ------------------------- -------------------------
        ``max_cpus``              Sets parallel environment
        ------------------------- -------------------------
        ``min_phys_memory``       Ignored
        ------------------------- -------------------------
        email                     -M `value`
        ------------------------- -------------------------
        ``email_on_started``      -m b
        ------------------------- -------------------------
        ``email_on_terminated``   -m e
        ------------------------- -------------------------
        ``job_name``              -N `value`
        ------------------------- -------------------------
        ``input_path``            -i `value`
        ------------------------- -------------------------
        ``output_path``           -o `value`
        ------------------------- -------------------------
        ``error_path``            -e `value`
        ------------------------- -------------------------
        ``join_files``            -j yes|no
        ------------------------- -------------------------
        ``reservation_id``        -ar `value`
        ------------------------- -------------------------
        ``queue_name``            -q `value`
        ------------------------- -------------------------
        priority                  -p `value`
        ------------------------- -------------------------
        ``start_time``            -a `value`
        ------------------------- -------------------------
        ``deadline_time``         Ignored
        ------------------------- -------------------------
        ``accounting_id``         -A `value`
        ========================= =========================

        Where `value` is the corresponding resource value.

        If 'working_directory' is not specified, add ``-cwd``.
        If 'input_path' is not specified, add ``-i /dev/null``.
        If 'output_path' is not specified, add ``-o <remote_command>.stdout``.
        If 'error_path' is not specified, add ``-j yes``.

        If 'native_specification' is specified, it is added to the `qsub`
        command just before 'remote_command' and 'args'.

        If specified, 'job_category' is used to index into the category
        map set up during allocator configuration.  The mapped category
        name as well as the 'min_cpus' and 'max_cpus' values are used
        with the ``-pe`` qsub option.

        Some resource limits are also handled:

        ==================== =========================
        Resource Key         Translation
        ==================== =========================
        ``core_file_size``   Ignored
        -------------------- -------------------------
        ``data_seg_size``    Ignored
        -------------------- -------------------------
        ``file_size``        Ignored
        -------------------- -------------------------
        ``open_files``       Ignored
        -------------------- -------------------------
        ``stack_size``       Ignored
        -------------------- -------------------------
        ``virtual_memory``   Ignored
        -------------------- -------------------------
        ``cpu_time``         -l h_cpu= `value`
        -------------------- -------------------------
        ``wallclock_time``   -l h_rt= `value`
        ==================== =========================

        Output from `qsub` itself is routed to ``qsub.out``.
        """
        self.home_dir = os.path.expanduser('~')
        self.work_dir = ''

        cmd = list(self._QSUB)
        cmd.extend(('-V', '-sync', 'yes', '-b', 'yes'))
        env = None
        inp, out, err = None, None, None

        # Set working directory now, for possible path fixing.
        try:
            value = resource_desc['working_directory']
        except KeyError:
            pass
        else:
            self.work_dir = self._fix_path(value)
            cmd.extend(('-wd', value))

        # Process description in fixed, repeatable order.
        keys = ('submit_as_hold', 'rerunnable', 'job_environment', 'email',
                'email_on_started', 'email_on_terminated', 'job_name',
                'input_path', 'output_path', 'error_path', 'join_files',
                'reservation_id', 'queue_name', 'priority', 'start_time',
                'accounting_id')

        email_events = ''
        for key in keys:
            try:
                value = resource_desc[key]
            except KeyError:
                continue

            if key == 'submit_as_hold':
                if value:
                    cmd.append('-h')
            elif key == 'rerunnable':
                cmd.extend(('-r', 'yes' if value else 'no'))
            elif key == 'job_environment':
                env = value
            elif key == 'email':
                cmd.extend(('-M', ','.join(value)))
            elif key == 'email_on_started':
                email_events += 'b'
            elif key == 'email_on_terminated':
                email_events += 'e'
            elif key == 'job_name':
                if value:
                    cmd.extend(('-N', self._jobname(value)))
            elif key == 'input_path':
                cmd.extend(('-i', self._fix_path(value)))
                inp = value
            elif key == 'output_path':
                cmd.extend(('-o', self._fix_path(value)))
                out = value
            elif key == 'error_path':
                cmd.extend(('-e', self._fix_path(value)))
                err = value
            elif key == 'join_files':
                cmd.extend(('-j', 'yes' if value else 'no'))
                if value:
                    err = 'yes'
            elif key == 'reservation_id':
                cmd.extend(('-ar', value))
            elif key == 'queue_name':
                cmd.extend(('-q', value))
            elif key == 'priority':
                cmd.extend(('-p', str(value)))
            elif key == 'start_time':
                cmd.extend(('-a', value.strftime('%Y%m%d%H%M.%S')))
            elif key == 'accounting_id':
                cmd.extend(('-A', value))

        if email_events:
            cmd.extend(('-m', email_events))

        # Setup parallel environment.
        if 'job_category' in resource_desc:
            job_category = resource_desc['job_category']
            try:
                parallel_environment = self.category_map[job_category]
            except KeyError:
                msg = 'No mapping for job_category %r' % job_category
                self._logger.error(msg)
                raise ValueError(msg)
            min_cpus = resource_desc.get('min_cpus', 1)
            max_cpus = resource_desc.get('max_cpus', min_cpus)
            cmd.extend(
                ('-pe', parallel_environment, '%d-%d' % (min_cpus, max_cpus)))

        # Set resource limits.
        if 'resource_limits' in resource_desc:
            limits = resource_desc['resource_limits']
            if 'cpu_time' in limits:
                cpu_time = limits['cpu_time']
                cmd.extend(('-l', 'h_cpu=%s' % self._timelimit(cpu_time)))
            if 'wallclock_time' in limits:
                wall_time = limits['wallclock_time']
                cmd.extend(('-l', 'h_rt=%s' % self._timelimit(wall_time)))

        # Set default command configuration.
        if not self.work_dir:
            cmd.append('-cwd')
        if inp is None:
            cmd.extend(('-i', DEV_NULL))
        if out is None:
            base = os.path.basename(resource_desc['remote_command'])
            cmd.extend(('-o', '%s.stdout' % base))
        if err is None:
            cmd.extend(('-j', 'yes'))

        # Add 'escape' clause.
        if 'native_specification' in resource_desc:
            cmd.extend(resource_desc['native_specification'])

        cmd.append(self._fix_path(resource_desc['remote_command']))

        if 'args' in resource_desc:
            for arg in resource_desc['args']:
                cmd.append(self._fix_path(arg))

        self._logger.info('%r', ' '.join(cmd))
        try:
            process = ShellProc(cmd, DEV_NULL, 'qsub.out', STDOUT, env)
        except Exception as exc:
            self._logger.error('exception creating process: %s', exc)
            raise

        self._logger.debug('    PID = %d', process.pid)
        return_code, error_msg = process.wait(1)
        self._logger.debug('    returning %s', (return_code, error_msg))
        return (return_code, error_msg)
    def execute_command(self, resource_desc):
        """
        Submit command based on `resource_desc`.

        resource_desc: dict
            Description of command and required resources.

        The '-V' `qsub` option is always used to export the current environment
        to the job. This environment is first updated with any 'job_environment'
        data. The '-sync yes' `qsub` option is used to wait for job completion.

        Other job resource keys are processed as follows:

        ========================= =========================
        Resource Key              Translation
        ========================= =========================
        ``submit_as_hold``        -h
        ------------------------- -------------------------
        ``rerunnable``            -r yes|no
        ------------------------- -------------------------
        ``working_directory``     Ignored
        ------------------------- -------------------------
        ``job_category``          Sets parallel environment
        ------------------------- -------------------------
        ``min_cpus``              Sets parallel environment
        ------------------------- -------------------------
        ``max_cpus``              Sets parallel environment
        ------------------------- -------------------------
        ``min_phys_memory``       Ignored
        ------------------------- -------------------------
        ``email``                 -M `value`
        ------------------------- -------------------------
        ``email_on_started``      -m b
        ------------------------- -------------------------
        ``email_on_terminated``   -m e
        ------------------------- -------------------------
        ``job_name``              -N `value`
        ------------------------- -------------------------
        ``input_path``            -i `value`
        ------------------------- -------------------------
        ``output_path``           -o `value`
        ------------------------- -------------------------
        ``error_path``            -e `value`
        ------------------------- -------------------------
        ``join_files``            -j yes|no
        ------------------------- -------------------------
        ``reservation_id``        -ar `value`
        ------------------------- -------------------------
        ``queue_name``            -q `value`
        ------------------------- -------------------------
        ``priority``              -p `value`
        ------------------------- -------------------------
        ``start_time``            -a `value`
        ------------------------- -------------------------
        ``deadline_time``         Ignored
        ------------------------- -------------------------
        ``accounting_id``         -A `value`
        ========================= =========================

        Where `value` is the corresponding resource value.

        The 'working_directory' key is ignored since the server has been
        started in this directory.  ``-cwd`` is used in the `qsub` command.

        If 'input_path' is not specified, add ``-i /dev/null``.
        If 'output_path' is not specified, add ``-o <remote_command>.stdout``.
        If 'error_path' is not specified, add ``-j yes``.

        If 'native_specification' is specified, it is added to the `qsub`
        command just before 'remote_command' and 'args'.

        If specified, 'job_category' is used to index into the category
        map set up during allocator configuration.  The mapped category
        name as well as the 'min_cpus' and 'max_cpus' values are used
        with the ``-pe`` qsub option.

        Some resource limits are also handled:

        ==================== =========================
        Resource Key         Translation
        ==================== =========================
        ``core_file_size``   Ignored
        -------------------- -------------------------
        ``data_seg_size``    Ignored
        -------------------- -------------------------
        ``file_size``        Ignored
        -------------------- -------------------------
        ``open_files``       Ignored
        -------------------- -------------------------
        ``stack_size``       Ignored
        -------------------- -------------------------
        ``virtual_memory``   Ignored
        -------------------- -------------------------
        ``cpu_time``         -l h_cpu= `value`
        -------------------- -------------------------
        ``wallclock_time``   -l h_rt= `value`
        ==================== =========================

        Output from `qsub` itself is routed to ``qsub.out``.
        """
        self.home_dir = os.path.expanduser("~")
        self.work_dir = os.getcwd()  # Server started in working directory.

        cmd = list(self._QSUB)
        cmd.extend(("-V", "-sync", "yes", "-b", "yes", "-cwd"))
        env = None
        inp, out, err = None, None, None

        # Process description in fixed, repeatable order.
        keys = (
            "submit_as_hold",
            "rerunnable",
            "job_environment",
            "email",
            "email_on_started",
            "email_on_terminated",
            "job_name",
            "input_path",
            "output_path",
            "error_path",
            "join_files",
            "reservation_id",
            "queue_name",
            "priority",
            "start_time",
            "accounting_id",
        )

        email_events = ""
        for key in keys:
            try:
                value = resource_desc[key]
            except KeyError:
                continue

            if key == "submit_as_hold":
                if value:
                    cmd.append("-h")
            elif key == "rerunnable":
                cmd.extend(("-r", "yes" if value else "no"))
            elif key == "job_environment":
                env = value
            elif key == "email":
                cmd.extend(("-M", ",".join(value)))
            elif key == "email_on_started":
                email_events += "b"
            elif key == "email_on_terminated":
                email_events += "e"
            elif key == "job_name":
                if value:
                    cmd.extend(("-N", self._jobname(value)))
            elif key == "input_path":
                cmd.extend(("-i", self._fix_path(value)))
                inp = value
            elif key == "output_path":
                cmd.extend(("-o", self._fix_path(value)))
                out = value
            elif key == "error_path":
                cmd.extend(("-e", self._fix_path(value)))
                err = value
            elif key == "join_files":
                cmd.extend(("-j", "yes" if value else "no"))
                if value:
                    err = "yes"
            elif key == "reservation_id":
                cmd.extend(("-ar", value))
            elif key == "queue_name":
                cmd.extend(("-q", value))
            elif key == "priority":
                cmd.extend(("-p", str(value)))
            elif key == "start_time":
                cmd.extend(("-a", value.strftime("%Y%m%d%H%M.%S")))
            elif key == "accounting_id":
                cmd.extend(("-A", value))

        if email_events:
            cmd.extend(("-m", email_events))

        # Setup parallel environment.
        if "job_category" in resource_desc:
            job_category = resource_desc["job_category"]
            try:
                parallel_environment = self.category_map[job_category]
            except KeyError:
                msg = "No mapping for job_category %r" % job_category
                self._logger.error(msg)
                raise ValueError(msg)
            min_cpus = resource_desc.get("min_cpus", 1)
            max_cpus = resource_desc.get("max_cpus", min_cpus)
            cmd.extend(("-pe", parallel_environment, "%d-%d" % (min_cpus, max_cpus)))

        # Set resource limits.
        if "resource_limits" in resource_desc:
            limits = resource_desc["resource_limits"]
            if "cpu_time" in limits:
                cpu_time = limits["cpu_time"]
                cmd.extend(("-l", "h_cpu=%s" % self._timelimit(cpu_time)))
            if "wallclock_time" in limits:
                wall_time = limits["wallclock_time"]
                cmd.extend(("-l", "h_rt=%s" % self._timelimit(wall_time)))

        # Set default command configuration.
        if inp is None:
            cmd.extend(("-i", DEV_NULL))
        if out is None:
            base = os.path.basename(resource_desc["remote_command"])
            cmd.extend(("-o", "%s.stdout" % base))
        if err is None:
            cmd.extend(("-j", "yes"))

        # Add 'escape' clause.
        if "native_specification" in resource_desc:
            cmd.extend(resource_desc["native_specification"])

        cmd.append(self._fix_path(resource_desc["remote_command"]))

        if "args" in resource_desc:
            for arg in resource_desc["args"]:
                cmd.append(self._fix_path(arg))

        self._logger.info("%r", " ".join(cmd))
        try:
            process = ShellProc(cmd, DEV_NULL, "qsub.out", STDOUT, env)
        except Exception as exc:
            self._logger.error("exception creating process: %s", exc)
            raise

        self._logger.debug("    PID = %d", process.pid)
        return_code, error_msg = process.wait(1)
        self._logger.debug("    returning %s", (return_code, error_msg))
        return (return_code, error_msg)
def start_server(authkey='PublicKey',
                 address=None,
                 port=0,
                 prefix='server',
                 allowed_hosts=None,
                 allowed_users=None,
                 allow_shell=False,
                 allowed_types=None,
                 timeout=None,
                 tunnel=False,
                 resources=None,
                 log_prefix=None):
    """
    Start an :class:`ObjServerFactory` service in a separate process
    in the current directory.

    authkey: string
        Authorization key; must be matched by clients.

    address: string
        IPv4 address, hostname, or pipe name.
        Default is the host's default IPv4 address.

    port: int
        Server port (default of 0 implies next available port).
        Note that ports below 1024 typically require special privileges.
        If port is negative, then a local pipe is used for communication.

    prefix: string
        Prefix for server config file and stdout/stderr file.

    allowed_hosts: list(string)
        Host address patterns to check against. Required if `port` >= 0.
        Ignored if `allowed_users` is specified.

    allowed_users: dict
        Dictionary of users and corresponding public keys allowed access.
        If None, *any* user may access. If empty, no user may access.
        The host portions of user strings are used for address patterns.

    allow_shell: bool
        If True, :meth:`execute_command` and :meth:`load_model` are allowed.
        Use with caution!

    allowed_types: list(string)
        Names of types which may be created. If None, then allow types listed
        by :meth:`get_available_types`. If empty, no types are allowed.

    timeout: int
        Seconds to wait for server to start. Note that public key generation
        can take a while. The default value of None will use an internally
        computed value based on host type (and for Windows, the availability
        of pyWin32).

    tunnel: bool
        If True, report host IP address but listen for connections from a
        local SSH tunnel.

    resources: string
        Filename for resource configuration.

    log_prefix: string
        Name used to identify remote remote logging messages from server.
        Implies that the local process will be receiving the messages.

    Returns ``(server_proc, config_filename)``.
    """
    if timeout is None:
        if sys.platform == 'win32' and not HAVE_PYWIN32:  # pragma no cover
            timeout = 120
        else:
            timeout = 30

    server_key = prefix + '.key'
    server_cfg = prefix + '.cfg'
    server_out = prefix + '.out'
    for path in (server_cfg, server_out):
        if os.path.exists(path):
            os.remove(path)

    with open(server_key, 'w') as out:
        out.write('%s\n' % authkey)

    factory_path = pkg_resources.resource_filename('openmdao.main',
                                                   'objserverfactory.py')
    args = ['python', factory_path, '--port', str(port), '--prefix', prefix]

    if address is not None:
        args.extend(['--address', address])

    if tunnel:
        args.append('--tunnel')

    if resources is not None:
        args.append('--resources')
        args.append(resources)

    if allowed_users is not None:
        write_authorized_keys(allowed_users, 'users.allow',
                              logging.getLogger())
        args.extend(['--users', 'users.allow'])
    else:
        args.append('--allow-public')
        if port >= 0:
            if allowed_hosts is None:
                allowed_hosts = [socket.gethostbyname(socket.gethostname())]
                if allowed_hosts[0].startswith('127.') and \
                                '127.0.0.1' not in allowed_hosts:
                    allowed_hosts.append('127.0.0.1')
            with open('hosts.allow', 'w') as out:
                for pattern in allowed_hosts:
                    out.write('%s\n' % pattern)
            if sys.platform != 'win32' or HAVE_PYWIN32:
                make_private('hosts.allow')
            else:  # pragma no cover
                logging.warning("Can't make hosts.allow private")

    if allow_shell:
        args.append('--allow-shell')

    if allowed_types is not None:
        with open('types.allow', 'w') as out:
            for typname in allowed_types:
                out.write('%s\n' % typname)
        if sys.platform != 'win32' or HAVE_PYWIN32:
            make_private('types.allow')
        else:  # pragma no cover
            logging.warning("Can't make types.allow private")
        args.extend(['--types', 'types.allow'])

    if log_prefix is not None:
        log_host = socket.gethostname()
        log_port = logging_port(log_host, log_host)
        args.extend(['--log-host', log_host, '--log-port', str(log_port)])
        if log_prefix:  # Could be null (for default).
            args.extend(['--log-prefix', log_prefix])

    proc = ShellProc(args, stdout=server_out, stderr=STDOUT)

    try:
        # Wait for valid server_cfg file.
        retry = 0
        while (not os.path.exists(server_cfg)) or \
                (os.path.getsize(server_cfg) == 0):
            return_code = proc.poll()
            if return_code:
                error_msg = proc.error_message(return_code)
                raise RuntimeError('Server startup failed %s' % error_msg)
            retry += 1
            if retry < 10 * timeout:
                time.sleep(.1)
            # Hard to cause a startup timeout.
            else:  # pragma no cover
                proc.terminate(timeout)
                raise RuntimeError('Server startup timeout')
        return (proc, server_cfg)
    finally:
        if os.path.exists(server_key):
            os.remove(server_key)
Esempio n. 16
0
class ExternalCode(ComponentWithDerivatives):
    """ Run an external code as a component. """

    PIPE   = subprocess.PIPE
    STDOUT = subprocess.STDOUT

    # pylint: disable-msg=E1101
    command = Str('', 
                  desc='The command to be executed.')
    env_vars = Dict({}, iotype='in',
                    desc='Environment variables required by the command.')
    resources = Dict({}, iotype='in',
                     desc='Resources required to run this component.')
    poll_delay = Float(0., low=0., units='s', iotype='in',
                       desc='Delay between polling for command completion.'
                            ' A value of zero will use an internally computed'
                            ' default.')
    timeout = Float(0., low=0., iotype='in', units='s',
                    desc='Maximum time to wait for command completion.'
                         ' A value of zero implies an infinite wait.')
    timed_out = Bool(False, iotype='out',
                     desc='True if the command timed-out.')
    return_code = Int(0, iotype='out',
                      desc='Return code from the command.')

    def __init__(self, *args, **kwargs):
        super(ExternalCode, self).__init__(*args, **kwargs)

        self.stdin  = None
        self.stdout = None
        self.stderr = "error.out"

        self._process = None
        self._server = None

    # This gets used by remote server.
    def get_access_controller(self):  #pragma no cover
        """ Return :class:`AccessController` for this object. """
        return _AccessController()

    @rbac(('owner', 'user'))
    def set(self, path, value, index=None, src=None, force=False):
        """ Don't allow setting of 'command' by remote client. """
        if path in ('command', 'get_access_controller') and remote_access():
            self.raise_exception('%r may not be set() remotely' % path,
                                 RuntimeError)
        return super(ExternalCode, self).set(path, value, index, src, force)

    def execute(self):
        """
        Runs the specified command.

        First removes existing output (but not in/out) files.
        Then if `resources` have been specified, an appropriate server
        is allocated and the command is run on that server.
        Otherwise the command is run locally.
        """
        self.return_code = -12345678
        self.timed_out = False

        for metadata in self.external_files:
            if metadata.get('output', False) and \
               not metadata.get('input', False):
                for path in glob.glob(metadata.path):
                    if os.path.exists(path):
                        os.remove(path)

        if not self.command:
            self.raise_exception('Null command line', ValueError)

        return_code = None
        error_msg = ''
        try:
            if self.resources:
                return_code, error_msg = self._execute_remote()
            else:
                return_code, error_msg = self._execute_local()

            if return_code is None:
                if self._stop:
                    self.raise_exception('Run stopped', RunStopped)
                else:
                    self.timed_out = True
                    self.raise_exception('Timed out', RunInterrupted)
            elif return_code:

                if isinstance(self.stderr, str):
                    stderrfile = open(self.stderr, 'r')
                    error_desc = stderrfile.read()
                    stderrfile.close()
                    
                    err_fragment = "\nError Output:\n%s" % error_desc
                else:
                    err_fragment = error_msg
                    
                self.raise_exception('return_code = %d%s' \
                    % (return_code, err_fragment), RuntimeError)
        finally:
            self.return_code = -999999 if return_code is None else return_code

    def _execute_local(self):
        """ Run command. """
        self._logger.info("executing '%s'...", self.command)
        start_time = time.time()

        self._process = \
            ShellProc(self.command, self.stdin, self.stdout, self.stderr,
                      self.env_vars)
        self._logger.debug('PID = %d', self._process.pid)

        try:
            return_code, error_msg = \
                self._process.wait(self.poll_delay, self.timeout)
        finally:
            self._process.close_files()
            self._process = None

        et = time.time() - start_time
        if et >= 60:  #pragma no cover
            self._logger.info('elapsed time: %.1f sec.', et)

        return (return_code, error_msg)

    def _execute_remote(self):
        """
        Allocate a server based on required resources, send inputs,
        run command, and retrieve results.
        """
        # Allocate server.
        self._server, server_info = RAM.allocate(self.resources)
        if self._server is None:
            self.raise_exception('Server allocation failed :-(', RuntimeError)

        return_code = -88888888
        error_msg = ''
        try:
            # Send inputs.
            patterns = []
            for metadata in self.external_files:
                if metadata.get('input', False):
                    patterns.append(metadata.path)
            if patterns:
                self._send_inputs(patterns)
            else:
                self._logger.debug("No input metadata paths")

            # Run command.
            self._logger.info("executing '%s'...", self.command)
            start_time = time.time()
            return_code, error_msg = \
                self._server.execute_command(self.command, self.stdin,
                                             self.stdout, self.stderr,
                                             self.env_vars, self.poll_delay,
                                             self.timeout)
            et = time.time() - start_time
            if et >= 60:  #pragma no cover
                self._logger.info('elapsed time: %f sec.', et)

            # Retrieve results.
            patterns = []
            for metadata in self.external_files:
                if metadata.get('output', False):
                    patterns.append(metadata.path)
            if patterns:
                self._retrieve_results(patterns)
            else:
                self._logger.debug("No output metadata paths")

        finally:
            RAM.release(self._server)
            self._server = None

        return (return_code, error_msg)

    def _send_inputs(self, patterns):
        """ Sends input files matching `patterns`. """
        self._logger.info('sending inputs...')
        start_time = time.time()

        filename = 'inputs.zip'
        pfiles, pbytes = pack_zipfile(patterns, filename, self._logger)
        try:
            filexfer(None, filename, self._server, filename, 'b')
            ufiles, ubytes = self._server.unpack_zipfile(filename)
        finally:
            os.remove(filename)

        # Difficult to force file transfer error.
        if ufiles != pfiles or ubytes != pbytes:  #pragma no cover
            msg = 'Inputs xfer error: %d:%d vs. %d:%d' \
                  % (ufiles, ubytes, pfiles, pbytes)
            self.raise_exception(msg, RuntimeError)

        et = time.time() - start_time
        if et >= 60:  #pragma no cover
            self._logger.info('elapsed time: %f sec.', et)

    def _retrieve_results(self, patterns):
        """ Retrieves result files matching `patterns`. """
        self._logger.info('retrieving results...')
        start_time = time.time()

        filename = 'outputs.zip'
        pfiles, pbytes = self._server.pack_zipfile(tuple(patterns), filename)
        try:
            filexfer(self._server, filename, None, filename, 'b')
            ufiles, ubytes = unpack_zipfile(filename, self._logger)
        finally:
            os.remove(filename)

        # Difficult to force file transfer error.
        if ufiles != pfiles or ubytes != pbytes:  #pragma no cover
            msg = 'Results xfer error: %d:%d vs. %d:%d' \
                  % (ufiles, ubytes, pfiles, pbytes)
            self.raise_exception(msg, RuntimeError)

        et = time.time() - start_time
        if et >= 60:  #pragma no cover
            self._logger.info('elapsed time: %f sec.', et)

    def stop(self):
        """ Stop the external code. """
        self._stop = True
        if self._process:
            self._process.terminate()

    def copy_inputs(self, inputs_dir, patterns):
        """
        Copy inputs from `inputs_dir` that match `patterns`.

        inputs_dir: string
            Directory to copy files from. Relative paths are evaluated from
            the component's execution directory.

        patterns: list or string
            One or more :mod:`glob` patterns to match against.

        This can be useful for resetting problem state.
        """
        self._logger.info('copying initial inputs from %s...', inputs_dir)
        with self.dir_context:
            if not os.path.exists(inputs_dir):
                self.raise_exception("inputs_dir '%s' does not exist" \
                                     % inputs_dir, RuntimeError)
            self._copy(inputs_dir, patterns)

    def copy_results(self, results_dir, patterns):
        """
        Copy files from `results_dir` that match `patterns`.

        results_dir: string
            Directory to copy files from. Relative paths are evaluated from
            the component's execution directory.

        patterns: list or string
            One or more :mod:`glob` patterns to match against.

        This can be useful for workflow debugging when the external
        code takes a long time to execute.
        """
        self._logger.info('copying precomputed results from %s...', results_dir)
        with self.dir_context:
            if not os.path.exists(results_dir):
                self.raise_exception("results_dir '%s' does not exist" \
                                     % results_dir, RuntimeError)
            self._copy(results_dir, patterns)

    def _copy(self, directory, patterns):
        """
        Copy files from `directory` that match `patterns`
        to the current directory and ensure they are writable.

        directory: string
            Directory to copy files from.

        patterns: list or string
            One or more :mod:`glob` patterns to match against.
        """
        if isinstance(patterns, basestring):
            patterns = [patterns]

        for pattern in patterns:
            pattern = os.path.join(directory, pattern)
            for src_path in sorted(glob.glob(pattern)):
                dst_path = os.path.basename(src_path)
                self._logger.debug('    %s', src_path)
                shutil.copy(src_path, dst_path)
                # Ensure writable.
                mode = os.stat(dst_path).st_mode
                mode |= stat.S_IWUSR
                os.chmod(dst_path, mode)
    def execute_command(self, resource_desc):
        """
        Run command described by `resource_desc` in a subprocess if this
        server's `allow_shell` attribute is True.

        resource_desc: dict
            Contains job description.

        The current environment, along with any 'job_environment' specification,
        is in effect while running 'remote_command'.

        If 'input_path' is not specified, ``/dev/null`` or ``nul:`` is used.
        If 'output_path' is not specified, ``<remote_command>.stdout`` is used.
        If neither 'error_path' nor 'join_files' are specified,
        ``<remote_command>.stderr`` is used.

        If specified in the 'resource_limits' dictionary, 'wallclock_time' is
        used as a timeout.

        All other queuing resource keys are ignored.

        The ``HOME_DIRECTORY`` and ``WORKING_DIRECTORY`` placeholders are
        ignored.
        """
        try:
            job_name = resource_desc['job_name']
        except KeyError:
            job_name = ''

        command = resource_desc['remote_command']
        self._check_path(command, 'execute_command')
        base = os.path.basename(command)
        command = [command]
        if 'args' in resource_desc:
            command.extend(resource_desc['args'])

        self._logger.debug('execute_command %s %r', job_name, command)
        if not self._allow_shell:
            self._logger.error('attempt to execute %r by %r', command,
                               get_credentials().user)
            raise RuntimeError('shell access is not allowed by this server')

        env_vars = resource_desc.get('job_environment')

        try:
            stdin = resource_desc['input_path']
            self._check_path(stdin, 'execute_command')
        except KeyError:
            stdin = DEV_NULL

        try:
            stdout = resource_desc['output_path']
            self._check_path(stdout, 'execute_command')
        except KeyError:
            stdout = base+'.stdout'

        try:
            stderr = resource_desc['error_path']
            self._check_path(stderr, 'execute_command')
        except KeyError:
            try:
                join_files = resource_desc['join_files']
            except KeyError:
                stderr = base+'.stderr'
            else:
                stderr = STDOUT if join_files else base+'.stderr'

        limits = resource_desc.get('resource_limits', {})
        timeout = limits.get('wallclock_time', 0)
        poll_delay = 1

        try:
            process = ShellProc(command, stdin, stdout, stderr, env_vars)
        except Exception as exc:
            self._logger.error('exception creating process: %s', exc)
            raise

        self._logger.debug('    PID = %d', process.pid)
        return_code, error_msg = process.wait(poll_delay, timeout)
        self._logger.debug('    returning %s', (return_code, error_msg))
        return (return_code, error_msg)
Esempio n. 18
0
    def execute_command(self, resource_desc):
        """
        Submit command based on `resource_desc`.

        resource_desc: dict
            Description of command and required resources.

        The '-V' `qsub` option is always used to export the current environment
        to the job. This environment is first updated with any 'job_environment'
        data. The '-W block=true' `qsub` option is used to wait for job
        completion.

        Other job resource keys are processed as follows:

        ========================= ===========================
        Resource Key              Translation
        ========================= ===========================
        submit_as_hold            -h
        ------------------------- ---------------------------
        rerunnable                -r y|n
        ------------------------- ---------------------------
        working_directory         Handled in generated script
        ------------------------- ---------------------------
        job_category              Ignored
        ------------------------- ---------------------------
        min_cpus                  -l select= `value` :ncpus=1
        ------------------------- ---------------------------
        max_cpus                  Ignored
        ------------------------- ---------------------------
        min_phys_memory           Ignored
        ------------------------- ---------------------------
        email                     -M `value`
        ------------------------- ---------------------------
        email_on_started          -m b
        ------------------------- ---------------------------
        email_on_terminated       -m e
        ------------------------- ---------------------------
        job_name                  -N `value`
        ------------------------- ---------------------------
        input_path                Handled in generated script
        ------------------------- ---------------------------
        output_path               Handled in generated script
        ------------------------- ---------------------------
        error_path                Handled in generated script
        ------------------------- ---------------------------
        join_files                Handled in generated script
        ------------------------- ---------------------------
        reservation_id            Ignored
        ------------------------- ---------------------------
        queue_name                -q `value`
        ------------------------- ---------------------------
        priority                  -p `value`
        ------------------------- ---------------------------
        start_time                -a `value`
        ------------------------- ---------------------------
        deadline_time             Ignored
        ------------------------- ---------------------------
        accounting_id             -W group_list= `value`
        ========================= ===========================

        Where `value` is the corresponding resource value.

        In order to support a working directory other than HOME or a
        PBS-generated scratch directory, a short script is written with
        PBS directives in the header. The script will change to the working
        directory and then run the command.
        
        If 'working_directory' is not specified, use current server directory.
        If 'input_path' is not specified, use ``/dev/null``.
        If 'output_path' is not specified, use ``<remote_command>.stdout``.
        If 'error_path' is not specified, use stdout.

        If 'native_specification' is specified, it is added to the `qsub`
        command just before the name of the generated script. If it contains
        a ``select`` clause, then that will prevent generation of a ``select``
        clause related to 'min_cpus'.

        Some resource limits are also handled:

        ==================== =========================
        Resource Key         Translation
        ==================== =========================
        core_file_size       Ignored
        -------------------- -------------------------
        data_seg_size        Ignored
        -------------------- -------------------------
        file_size            Ignored
        -------------------- -------------------------
        open_files           Ignored
        -------------------- -------------------------
        stack_size           Ignored
        -------------------- -------------------------
        virtual_memory       Ignored
        -------------------- -------------------------
        cpu_time             Ignored
        -------------------- -------------------------
        wallclock_time       -l walltime= `value`
        ==================== =========================

        Output from `qsub` itself is routed to ``qsub.out``.
        """
        self.home_dir = os.path.expanduser('~')
        self.work_dir = ''

        cmd = list(self._QSUB)
        cmd.extend(('-V', '-W', 'block=true', '-j', 'oe'))
        if sys.platform == 'win32':  # pragma no cover
            prefix = 'REM PBS'
            cmd.extend(('-C', '"%s"' % prefix))
            suffix = '-qsub.bat'
        else:
            prefix = '#PBS'
            cmd.extend(('-S', '/bin/sh'))
            suffix = '.qsub'
        env = None
        inp, out, err = None, None, None
        join_files = False

        # Set working directory now, for possible path fixing.
        try:
            value = resource_desc['working_directory']
        except KeyError:
            pass
        else:
            self.work_dir = self._fix_path(value)

        # Write script to be submitted rather than putting everything on
        # 'qsub' command line. We have to do this since otherwise there's
        # no way to set an execution directory or input path.
        if 'job_name' in resource_desc:
            base = self._jobname(resource_desc['job_name'])
        else:
            base = os.path.basename(resource_desc['remote_command'])
        script_name = '%s%s' % (base, suffix)

        native_specification = resource_desc.get('native_specification', [])

        with open(script_name, 'w') as script:
            if sys.platform == 'win32':  # pragma no cover
                script.write('@echo off\n')
            else:
                script.write('#!/bin/sh\n')

            # PBS (at least at NAS) requires 'group_list' be set.
            if 'accounting_id' in resource_desc:
                accounting_id = resource_desc['accounting_id']
            else:
                accounting_id = self.accounting_id
            script.write('%s -W group_list=%s\n'
                         % (prefix, accounting_id.strip()))

            # Process description in fixed, repeatable order.
            keys = ('submit_as_hold',
                    'rerunnable',
                    'job_environment',
                    'min_cpus',
                    'email',
                    'email_on_started',
                    'email_on_terminated',
                    'job_name',
                    'input_path',
                    'output_path',
                    'error_path',
                    'join_files',
                    'queue_name',
                    'priority',
                    'start_time')

            email_events = ''
            for key in keys:
                try:
                    value = resource_desc[key]
                except KeyError:
                    continue

                if key == 'submit_as_hold':
                    if value:
                        script.write('%s -h\n' % prefix)
                elif key == 'rerunnable':
                    script.write('%s -r %s\n' % (prefix, 'y' if value else 'n'))
                elif key == 'job_environment':
                    env = value
                elif key == 'min_cpus':
                    # Only write select clause if not in 'native_specification'.
                    for arg in native_specification:
                        if 'select' in arg:
                            break
                    else:
                        script.write('%s -l select=%d:ncpus=1\n' % (prefix, value))
                elif key == 'email':
                    script.write('%s -M %s\n' % (prefix, ','.join(value)))
                elif key == 'email_on_started':
                    email_events += 'b'
                elif key == 'email_on_terminated':
                    email_events += 'e'
                elif key == 'job_name':
                    script.write('%s -N %s\n' % (prefix, self._jobname(value)))
                elif key == 'input_path':
                    inp = value
                elif key == 'output_path':
                    out = value
                elif key == 'error_path':
                    err = value
                elif key == 'join_files':
                    join_files = value
                elif key == 'queue_name':
                    script.write('%s -q %s\n' % (prefix, value))
                elif key == 'priority':
                    script.write('%s -p %d\n' % (prefix, value))
                elif key == 'start_time':
                    script.write('%s -a %s\n'
                                 % (prefix, value.strftime('%Y%m%d%H%M.%S')))

            if email_events:
                script.write('%s -m %s\n' % (prefix, email_events))

            # Set resource limits.
            if 'resource_limits' in resource_desc:
                limits = resource_desc['resource_limits']
                if 'wallclock_time' in limits:
                    wall_time = limits['wallclock_time']
                    script.write('%s -l walltime=%s\n'
                                 % (prefix, self._timelimit(wall_time)))

            # Have script move to work directory relative to
            # home directory on execution host.
            home = os.path.realpath(os.path.expanduser('~'))
            work = os.path.realpath(self.work_dir or os.getcwd())
            if work.startswith(home):
                work = work[len(home)+1:]
                if sys.platform == 'win32':  # pragma no cover
                    script.write('cd %HOMEDRIVE%%HOMEPATH%\n')
                else:
                    script.write('cd $HOME\n')
            else:
                # This can potentially cause problems...
                self._logger.warning('work %r not a descendant of home %r',
                                     work, home)
            script.write('cd %s\n' % work)

            script.write(self._fix_path(resource_desc['remote_command']))

            if 'args' in resource_desc:
                for arg in resource_desc['args']:
                    script.write(' %s' % self._fix_path(arg))

            script.write(' <%s' % (inp or DEV_NULL))
            script.write(' >%s' % (out or '%s.stdout' % base))
            if join_files or err is None:
                script.write(' 2>&1')
            else:
                script.write(' 2>%s' % err)
            script.write('\n')

        if sys.platform != 'win32':
            os.chmod(script_name, 0700)

        # Add 'escape' clause.
        cmd.extend(native_specification)

        cmd.append(os.path.join('.', script_name))
        self._logger.info('%r', ' '.join(cmd))
        try:
            process = ShellProc(cmd, DEV_NULL, 'qsub.out', STDOUT, env)
        except Exception as exc:
            self._logger.error('exception creating process: %s', exc)
            raise

        self._logger.debug('    PID = %d', process.pid)
        return_code, error_msg = process.wait(1)
        self._logger.debug('    returning %s', (return_code, error_msg))
        return (return_code, error_msg)
Esempio n. 19
0
    def execute_command(self, resource_desc):
        """
        Submit command based on `resource_desc`.

        resource_desc: dict
            Description of command and required resources.

        The '-V' `qsub` option is always used to export the current environment
        to the job. This environment is first updated with any 'job_environment'
        data. The '-sync yes' `qsub` option is used to wait for job completion.

        Other job resource keys are processed as follows:

        ========================= ====================
        Resource Key              Translation
        ========================= ====================
        job_name                  -N `value`
        ------------------------- --------------------
        working_directory         -wd `value`
        ------------------------- --------------------
        parallel_environment      -pe `value` `n_cpus`
        ------------------------- --------------------
        input_path                -i `value`
        ------------------------- --------------------
        output_path               -o `value`
        ------------------------- --------------------
        error_path                -e `value`
        ------------------------- --------------------
        join_files                -j yes|no
        ------------------------- --------------------
        email                     -M `value`
        ------------------------- --------------------
        block_email               -m n
        ------------------------- --------------------
        email_events              -m `value`
        ------------------------- --------------------
        start_time                -a `value`
        ------------------------- --------------------
        deadline_time             Not supported
        ------------------------- --------------------
        hard_wallclock_time_limit -l h_rt= `value`
        ------------------------- --------------------
        soft_wallclock_time_limit -l s_rt= `value`
        ------------------------- --------------------
        hard_run_duration_limit   -l h_cpu= `value`
        ------------------------- --------------------
        soft_run_duration_limit   -l s_cpu= `value`
        ------------------------- --------------------
        job_category              Not supported
        ========================= ====================

        Where `value` is the corresponding resource value and
        `n_cpus` is the value of the 'n_cpus' resource, or 1.

        If 'working_directory' is not specified, add ``-cwd``.
        If 'input_path' is not specified, add ``-i /dev/null``.
        If 'output_path' is not specified, add ``-o <remote_command>.stdout``.
        If 'error_path' is not specified, add ``-j yes``.

        If 'native_specification' is specified, it is added to the `qsub`
        command just before 'remote_command' and 'args'.

        Output from `qsub` itself is routed to ``qsub.out``.
        """
        self.home_dir = os.environ['HOME']
        self.work_dir = ''

        cmd = [self._QSUB, '-V', '-sync', 'yes']
        env = None
        inp, out, err = None, None, None

        # Set working directory now, for possible path fixing.
        try:
            value = resource_desc['working_directory']
        except KeyError:
            pass
        else:
            self.work_dir = self._fix_path(value)
            cmd.append('-wd')
            cmd.append(value)

        # Process description in fixed, repeatable order.
        keys = ('job_name',
                'job_environment',
                'parallel_environment',
                'input_path',
                'output_path',
                'error_path',
                'join_files',
                'email',
                'block_email',
                'email_events',
                'start_time',
                'hard_wallclock_time_limit',
                'soft_wallclock_time_limit',
                'hard_run_duration_limit',
                'soft_run_duration_limit')

        for key in keys:
            try:
                value = resource_desc[key]
            except KeyError:
                continue

            if key == 'job_name':
                cmd.append('-N')
                cmd.append(value)
            elif key == 'job_environment':
                env = value
            elif key == 'parallel_environment':
                n_cpus = resource_desc.get('n_cpus', 1)
                cmd.append('-pe')
                cmd.append(value)
                cmd.append(str(n_cpus))
            elif key == 'input_path':
                cmd.append('-i')
                cmd.append(self._fix_path(value))
                inp = value
            elif key == 'output_path':
                cmd.append('-o')
                cmd.append(self._fix_path(value))
                out = value
            elif key == 'error_path':
                cmd.append('-e')
                cmd.append(self._fix_path(value))
                err = value
            elif key == 'join_files':
                cmd.append('-j')
                cmd.append('yes' if value else 'no')
                if value:
                    err = 'yes'
            elif key == 'email':
                cmd.append('-M')
                cmd.append(','.join(value))
            elif key == 'block_email':
                if value:
                    cmd.append('-m')
                    cmd.append('n')
            elif key == 'email_events':
                cmd.append('-m')
                cmd.append(value)
            elif key == 'start_time':
                cmd.append('-a')
                cmd.append(value)  # May need to translate
            elif key == 'hard_wallclock_time_limit':
                cmd.append('-l')
                cmd.append('h_rt=%s' % self._make_time(value))
            elif key == 'soft_wallclock_time_limit':
                cmd.append('-l')
                cmd.append('s_rt=%s' % self._make_time(value))
            elif key == 'hard_run_duration_limit':
                cmd.append('-l')
                cmd.append('h_cpu=%s' % self._make_time(value))
            elif key == 'soft_run_duration_limit':
                cmd.append('-l')
                cmd.append('s_cpu=%s' % self._make_time(value))

        if not self.work_dir:
            cmd.append('-cwd')

        if inp is None:
            cmd.append('-i')
            cmd.append('/dev/null')
        if out is None:
            cmd.append('-o')
            cmd.append('%s.stdout'
                       % os.path.basename(resource_desc['remote_command']))
        if err is None:
            cmd.append('-j')
            cmd.append('yes')

        if 'native_specification' in resource_desc:
            cmd.extend(resource_desc['native_specification'])

        cmd.append(self._fix_path(resource_desc['remote_command']))

        if 'args' in resource_desc:
            for arg in resource_desc['args']:
                cmd.append(self._fix_path(arg))

        self._logger.info('%r', ' '.join(cmd))
        try:
            process = ShellProc(cmd, '/dev/null', 'qsub.out', STDOUT, env)
        except Exception as exc:
            self._logger.error('exception creating process: %s', exc)
            raise

        self._logger.debug('    PID = %d', process.pid)
        return_code, error_msg = process.wait(1)
        self._logger.debug('    returning %s', (return_code, error_msg))
        return (return_code, error_msg)
class ExternalCode(ComponentWithDerivatives):
    """
    Run an external code as a component. The component can be configured to
    run the code on a remote server, see :meth:`execute`.
    """

    PIPE = subprocess.PIPE
    STDOUT = subprocess.STDOUT

    # pylint: disable-msg=E1101
    command = List(Str, desc='The command to be executed.')
    env_vars = Dict({},
                    iotype='in',
                    desc='Environment variables required by the command.')
    resources = Dict({},
                     iotype='in',
                     desc='Resources required to run this component.')
    poll_delay = Float(0.,
                       low=0.,
                       units='s',
                       iotype='in',
                       desc='Delay between polling for command completion.'
                       ' A value of zero will use an internally computed'
                       ' default.')
    timeout = Float(0.,
                    low=0.,
                    iotype='in',
                    units='s',
                    desc='Maximum time to wait for command completion.'
                    ' A value of zero implies an infinite wait.')
    timed_out = Bool(False,
                     iotype='out',
                     desc='True if the command timed-out.')
    return_code = Int(0, iotype='out', desc='Return code from the command.')

    def __init__(self, *args, **kwargs):
        super(ExternalCode, self).__init__(*args, **kwargs)

        self.stdin = None
        self.stdout = None
        self.stderr = "error.out"

        self._process = None
        self._server = None

    # This gets used by remote server.
    def get_access_controller(self):  #pragma no cover
        """ Return :class:`AccessController` for this object. """
        return _AccessController()

    @rbac(('owner', 'user'))
    def set(self, path, value, index=None, src=None, force=False):
        """ Don't allow setting of 'command' by a remote client. """
        if path in ('command', 'get_access_controller') and remote_access():
            self.raise_exception('%r may not be set() remotely' % path,
                                 RuntimeError)
        return super(ExternalCode, self).set(path, value, index, src, force)

    def execute(self):
        """
        Runs the specified command.

        First removes existing output (but not in/out) files.
        Then if `resources` have been specified, an appropriate server
        is allocated and the command is run on that server.
        Otherwise the command is run locally.

        When running remotely, the following resources are set:

        ======================= =====================================
        Key                     Value
        ======================= =====================================
        job_name                self.get_pathname()
        ----------------------- -------------------------------------
        remote_command          self.command (first item)
        ----------------------- -------------------------------------
        args                    self.command (2nd through last items)
        ----------------------- -------------------------------------
        job_environment         self.env_vars
        ----------------------- -------------------------------------
        input_path              self.stdin
        ----------------------- -------------------------------------
        output_path             self.stdout
        ----------------------- -------------------------------------
        error_path              self.stderr (if != STDOUT)
        ----------------------- -------------------------------------
        join_files              If self.stderr == STDOUT
        ----------------------- -------------------------------------
        hard_run_duration_limit self.timeout (if non-zero)
        ======================= =====================================

        .. note::

            Input files to be sent to the remote server are defined by
            :class:`FileMetadata` entries in the `external_files` list
            with `input` True.  Similarly, output files to be retrieved
            from the remote server are defined by entries with `output`
            True.

        .. warning::

            Any file **not** labelled with `binary` True will undergo
            newline translation if the local and remote machines have
            different newline representations. Newline translation will
            corrupt a file which is binary but hasn't been labelled as
            such.

        """
        self.return_code = -12345678
        self.timed_out = False

        for metadata in self.external_files:
            if metadata.get('output', False) and \
               not metadata.get('input', False):
                for path in glob.glob(metadata.path):
                    if os.path.exists(path):
                        os.remove(path)

        if not self.command:
            self.raise_exception('Null command line', ValueError)

        return_code = None
        error_msg = ''
        try:
            if self.resources:
                return_code, error_msg = self._execute_remote()
            else:
                return_code, error_msg = self._execute_local()

            if return_code is None:
                if self._stop:
                    self.raise_exception('Run stopped', RunStopped)
                else:
                    self.timed_out = True
                    self.raise_exception('Timed out', RunInterrupted)

            elif return_code:
                if isinstance(self.stderr, str):
                    stderrfile = open(self.stderr, 'r')
                    error_desc = stderrfile.read()
                    stderrfile.close()
                    err_fragment = "\nError Output:\n%s" % error_desc
                else:
                    err_fragment = error_msg

                self.raise_exception('return_code = %d%s' \
                    % (return_code, err_fragment), RuntimeError)
        finally:
            self.return_code = -999999 if return_code is None else return_code

    def _execute_local(self):
        """ Run command. """
        self._logger.info('executing %s...', self.command)
        start_time = time.time()

        self._process = \
            ShellProc(self.command, self.stdin, self.stdout, self.stderr,
                      self.env_vars)
        self._logger.debug('PID = %d', self._process.pid)

        try:
            return_code, error_msg = \
                self._process.wait(self.poll_delay, self.timeout)
        finally:
            self._process.close_files()
            self._process = None

        et = time.time() - start_time
        if et >= 60:  #pragma no cover
            self._logger.info('elapsed time: %.1f sec.', et)

        return (return_code, error_msg)

    def _execute_remote(self):
        """
        Allocate a server based on required resources, send inputs,
        run command, and retrieve results.
        """
        # Allocate server.
        self._server, server_info = RAM.allocate(self.resources)
        if self._server is None:
            self.raise_exception('Server allocation failed :-(', RuntimeError)

        return_code = -88888888
        error_msg = ''
        try:
            # Create resource description for command.
            rdesc = self.resources.copy()
            rdesc['job_name'] = self.get_pathname()
            rdesc['remote_command'] = self.command[0]
            if len(self.command) > 1:
                rdesc['args'] = self.command[1:]
            if self.env_vars:
                rdesc['job_environment'] = self.env_vars
            if self.stdin:
                rdesc['input_path'] = self.stdin
            if self.stdout:
                rdesc['output_path'] = self.stdout
            if self.stderr:
                if self.stderr == self.STDOUT:
                    rdesc['join_files'] = True
                else:
                    rdesc['error_path'] = self.stderr
            if self.timeout:
                rdesc['hard_run_duration_limit'] = self.timeout

            # Send inputs.
            patterns = []
            textfiles = []
            for metadata in self.external_files:
                if metadata.get('input', False):
                    patterns.append(metadata.path)
                    if not metadata.binary:
                        textfiles.append(metadata.path)
            if patterns:
                self._send_inputs(patterns, textfiles)
            else:
                self._logger.debug('No input metadata paths')

            # Run command.
            self._logger.info('executing %s...', self.command)
            start_time = time.time()
            return_code, error_msg = \
                self._server.execute_command(rdesc)
            et = time.time() - start_time
            if et >= 60:  #pragma no cover
                self._logger.info('elapsed time: %.1f sec.', et)

            # Retrieve results.
            patterns = []
            textfiles = []
            for metadata in self.external_files:
                if metadata.get('output', False):
                    patterns.append(metadata.path)
                    if not metadata.binary:
                        textfiles.append(metadata.path)
            if patterns:
                self._retrieve_results(patterns, textfiles)
            else:
                self._logger.debug('No output metadata paths')

        finally:
            RAM.release(self._server)
            self._server = None

        return (return_code, error_msg)

    def _send_inputs(self, patterns, textfiles):
        """ Sends input files matching `patterns`. """
        self._logger.info('sending inputs...')
        start_time = time.time()

        filename = 'inputs.zip'
        pfiles, pbytes = pack_zipfile(patterns, filename, self._logger)
        try:
            filexfer(None, filename, self._server, filename, 'b')
            ufiles, ubytes = self._server.unpack_zipfile(filename,
                                                         textfiles=textfiles)
        finally:
            os.remove(filename)

        # Difficult to force file transfer error.
        if ufiles != pfiles or ubytes != pbytes:  #pragma no cover
            msg = 'Inputs xfer error: %d:%d vs. %d:%d' \
                  % (ufiles, ubytes, pfiles, pbytes)
            self.raise_exception(msg, RuntimeError)

        et = time.time() - start_time
        if et >= 60:  #pragma no cover
            self._logger.info('elapsed time: %f sec.', et)

    def _retrieve_results(self, patterns, textfiles):
        """ Retrieves result files matching `patterns`. """
        self._logger.info('retrieving results...')
        start_time = time.time()

        filename = 'outputs.zip'
        pfiles, pbytes = self._server.pack_zipfile(patterns, filename)
        filexfer(self._server, filename, None, filename, 'b')

        # Valid, but empty, file causes unpack_zipfile() problems.
        try:
            if os.path.getsize(filename) > 0:
                ufiles, ubytes = unpack_zipfile(filename,
                                                logger=self._logger,
                                                textfiles=textfiles)
            else:
                ufiles, ubytes = 0, 0
        finally:
            os.remove(filename)

        # Difficult to force file transfer error.
        if ufiles != pfiles or ubytes != pbytes:  #pragma no cover
            msg = 'Results xfer error: %d:%d vs. %d:%d' \
                  % (ufiles, ubytes, pfiles, pbytes)
            self.raise_exception(msg, RuntimeError)

        et = time.time() - start_time
        if et >= 60:  #pragma no cover
            self._logger.info('elapsed time: %f sec.', et)

    def stop(self):
        """ Stop the external code. """
        self._stop = True
        if self._process:
            self._process.terminate()

    def copy_inputs(self, inputs_dir, patterns):
        """
        Copy inputs from `inputs_dir` that match `patterns`.

        inputs_dir: string
            Directory to copy files from. Relative paths are evaluated from
            the component's execution directory.

        patterns: list or string
            One or more :mod:`glob` patterns to match against.

        This can be useful for resetting problem state.
        """
        self._logger.info('copying initial inputs from %s...', inputs_dir)
        with self.dir_context:
            if not os.path.exists(inputs_dir):
                self.raise_exception("inputs_dir '%s' does not exist" \
                                     % inputs_dir, RuntimeError)
            self._copy(inputs_dir, patterns)

    def copy_results(self, results_dir, patterns):
        """
        Copy files from `results_dir` that match `patterns`.

        results_dir: string
            Directory to copy files from. Relative paths are evaluated from
            the component's execution directory.

        patterns: list or string
            One or more :mod:`glob` patterns to match against.

        This can be useful for workflow debugging when the external
        code takes a long time to execute.
        """
        self._logger.info('copying precomputed results from %s...',
                          results_dir)
        with self.dir_context:
            if not os.path.exists(results_dir):
                self.raise_exception("results_dir '%s' does not exist" \
                                     % results_dir, RuntimeError)
            self._copy(results_dir, patterns)

    def _copy(self, directory, patterns):
        """
        Copy files from `directory` that match `patterns`
        to the current directory and ensure they are writable.

        directory: string
            Directory to copy files from.

        patterns: list or string
            One or more :mod:`glob` patterns to match against.
        """
        if isinstance(patterns, basestring):
            patterns = [patterns]

        for pattern in patterns:
            pattern = os.path.join(directory, pattern)
            for src_path in sorted(glob.glob(pattern)):
                dst_path = os.path.basename(src_path)
                self._logger.debug('    %s', src_path)
                shutil.copy(src_path, dst_path)
                # Ensure writable.
                mode = os.stat(dst_path).st_mode
                mode |= stat.S_IWUSR
                os.chmod(dst_path, mode)
def start_server(authkey='PublicKey', address=None, port=0, prefix='server',
                 allowed_hosts=None, allowed_users=None, allow_shell=False,
                 allowed_types=None, timeout=None, tunnel=False,
                 resources=None, log_prefix=None):
    """
    Start an :class:`ObjServerFactory` service in a separate process
    in the current directory.

    authkey: string
        Authorization key, must be matched by clients.

    address: string
        IPv4 address, hostname, or pipe name.
        Default is the host's default IPv4 address.

    port: int
        Server port (default of 0 implies next available port).
        Note that ports below 1024 typically require special privileges.
        If port is negative, then a local pipe is used for communication.

    prefix: string
        Prefix for server config file and stdout/stderr file.

    allowed_hosts: list(string)
        Host address patterns to check against. Required if `port` >= 0.
        Ignored if `allowed_users` is specified.

    allowed_users: dict
        Dictionary of users and corresponding public keys allowed access.
        If None, *any* user may access. If empty, no user may access.
        The host portions of user strings are used for address patterns.

    allow_shell: bool
        If True, :meth:`execute_command` and :meth:`load_model` are allowed.
        Use with caution!

    allowed_types: list(string)
        Names of types which may be created. If None, then allow types listed
        by :meth:`get_available_types`. If empty, no types are allowed.

    timeout: int
        Seconds to wait for server to start. Note that public key generation
        can take a while. The default value of None will use an internally
        computed value based on host type (and for Windows, the availability
        of pyWin32).

    tunnel: bool
        If True, report host IP address but listen for connections from a
        local SSH tunnel.

    resources: string
        Filename for resource configuration.

    log_prefix: string
        Name used to identify remote remote logging messages from server.
        Implies that the local process will be receiving the messages.

    Returns ``(server_proc, config_filename)``.
    """
    if timeout is None:
        if sys.platform == 'win32' and not HAVE_PYWIN32:  #pragma no cover
            timeout = 120
        else:
            timeout = 30

    server_key = prefix+'.key'
    server_cfg = prefix+'.cfg'
    server_out = prefix+'.out'
    for path in (server_cfg, server_out):
        if os.path.exists(path):
            os.remove(path)

    with open(server_key, 'w') as out:
        out.write('%s\n' % authkey)

    factory_path = pkg_resources.resource_filename('openmdao.main',
                                                   'objserverfactory.py')
    args = ['python', factory_path, '--port', str(port), '--prefix', prefix]

    if address is not None:
        args.extend(['--address', address])

    if tunnel:
        args.append('--tunnel')

    if resources is not None:
        args.append('--resources')
        args.append(resources)

    if allowed_users is not None:
        write_authorized_keys(allowed_users, 'users.allow', logging.getLogger())
        args.extend(['--users', 'users.allow'])
    else:
        args.append('--allow-public')
        if port >= 0:
            if allowed_hosts is None:
                allowed_hosts = [socket.gethostbyname(socket.gethostname())]
                if allowed_hosts[0].startswith('127.') and \
                   '127.0.0.1' not in allowed_hosts:
                    allowed_hosts.append('127.0.0.1')
            with open('hosts.allow', 'w') as out:
                for pattern in allowed_hosts:
                    out.write('%s\n' % pattern)
            if sys.platform != 'win32' or HAVE_PYWIN32:
                make_private('hosts.allow')
            else:  #pragma no cover
                logging.warning("Can't make hosts.allow private")

    if allow_shell:
        args.append('--allow-shell')

    if allowed_types is not None:
        with open('types.allow', 'w') as out:
            for typname in allowed_types:
                out.write('%s\n' % typname)
        if sys.platform != 'win32' or HAVE_PYWIN32:
            make_private('types.allow')
        else:  #pragma no cover
            logging.warning("Can't make types.allow private")
        args.extend(['--types', 'types.allow'])

    if log_prefix is not None:
        log_host = socket.gethostname()
        log_port = logging_port(log_host, log_host)
        args.extend(['--log-host', log_host, '--log-port', str(log_port)])
        if log_prefix:  # Could be null (for default).
            args.extend(['--log-prefix', log_prefix])

    proc = ShellProc(args, stdout=server_out, stderr=STDOUT)

    try:
        # Wait for valid server_cfg file.
        retry = 0
        while (not os.path.exists(server_cfg)) or \
              (os.path.getsize(server_cfg) == 0):
            return_code = proc.poll()
            if return_code:
                error_msg = proc.error_message(return_code)
                raise RuntimeError('Server startup failed %s' % error_msg)
            retry += 1
            if retry < 10*timeout:
                time.sleep(.1)
            # Hard to cause a startup timeout.
            else:  #pragma no cover
                proc.terminate(timeout)
                raise RuntimeError('Server startup timeout')
        return (proc, server_cfg)
    finally:
        if os.path.exists(server_key):
            os.remove(server_key)
    def execute_command(self, resource_desc):
        """
        Run command described by `resource_desc` in a subprocess if this
        server's `allow_shell` attribute is True.

        resource_desc: dict
            Contains job description.

        The current environment, along with any 'job_environment' specification,
        is in effect while running 'remote_command'.

        If 'input_path' is not specified, ``/dev/null`` or ``nul:`` is used.
        If 'output_path' is not specified, ``<remote_command>.stdout`` is used.
        If neither 'error_path' nor 'join_files' are specified,
        ``<remote_command>.stderr`` is used.

        If specified in the 'resource_limits' dictionary, 'wallclock_time' is
        used as a timeout.

        All other queuing resource keys are ignored.

        The ``HOME_DIRECTORY`` and ``WORKING_DIRECTORY`` placeholders are
        ignored.
        """
        try:
            job_name = resource_desc['job_name']
        except KeyError:
            job_name = ''

        command = resource_desc['remote_command']
        self._check_path(command, 'execute_command')
        base = os.path.basename(command)
        command = [command]
        if 'args' in resource_desc:
            command.extend(resource_desc['args'])

        self._logger.debug('execute_command %s %r', job_name, command)
        if not self._allow_shell:
            self._logger.error('attempt to execute %r by %r', command,
                               get_credentials().user)
            raise RuntimeError('shell access is not allowed by this server')

        env_vars = resource_desc.get('job_environment')

        try:
            stdin = resource_desc['input_path']
            self._check_path(stdin, 'execute_command')
        except KeyError:
            stdin = DEV_NULL

        try:
            stdout = resource_desc['output_path']
            self._check_path(stdout, 'execute_command')
        except KeyError:
            stdout = base + '.stdout'

        try:
            stderr = resource_desc['error_path']
            self._check_path(stderr, 'execute_command')
        except KeyError:
            try:
                join_files = resource_desc['join_files']
            except KeyError:
                stderr = base + '.stderr'
            else:
                stderr = STDOUT if join_files else base + '.stderr'

        limits = resource_desc.get('resource_limits', {})
        timeout = limits.get('wallclock_time', 0)
        poll_delay = 1

        try:
            process = ShellProc(command, stdin, stdout, stderr, env_vars)
        except Exception as exc:
            self._logger.error('exception creating process: %s', exc)
            raise

        self._logger.debug('    PID = %d', process.pid)
        return_code, error_msg = process.wait(poll_delay, timeout)
        self._logger.debug('    returning %s', (return_code, error_msg))
        return (return_code, error_msg)