def setup_tunnel(address, port): """ Setup tunnel to `address` and `port` assuming: - The remote login name matches the local login name. - `port` is available on the local host. - 'plink' is available on Windows, 'ssh' on other platforms. - No user interaction is required to connect via 'plink'/'ssh'. address: string IPv4 address to tunnel to. port: int Port at `address` to tunnel to. Returns ``(local_address, local_port)``. """ logname = 'tunnel-%s-%d.log' % (address, port) logname = os.path.join(os.getcwd(), logname) stdout = open(logname, 'w') user = getpass.getuser() if sys.platform == 'win32': # pragma no cover stdin = open('nul:', 'r') args = ['plink', '-ssh', '-l', user, '-L', '%d:localhost:%d' % (port, port), address] else: stdin = open('/dev/null', 'r') args = ['ssh', '-l', user, '-L', '%d:localhost:%d' % (port, port), address] tunnel_proc = ShellProc(args, stdin=stdin, stdout=stdout, stderr=STDOUT) sock = socket.socket(socket.AF_INET) address = ('127.0.0.1', port) for retry in range(20): time.sleep(.5) exitcode = tunnel_proc.poll() if exitcode is not None: msg = 'ssh tunnel process exited with exitcode %d,' \ ' output in %s' % (exitcode, logname) logging.error(msg) raise RuntimeError(msg) try: sock.connect(address) except socket.error as exc: if exc.args[0] != errno.ECONNREFUSED and \ exc.args[0] != errno.ENOENT: raise else: atexit.register(_cleanup_tunnel, tunnel_proc, logname) sock.close() return address _cleanup_tunnel(tunnel_proc, logname) raise RuntimeError('Timeout trying to connect through tunnel to %s' % address)
def _get_hosts(self): """ Return list of hostnames sorted by load. """ # Get host load information. try: proc = ShellProc(self._QHOST, stdout=PIPE) except Exception as exc: self._logger.error('%r failed: %s' % (self._QHOST, exc)) return [] lines = proc.stdout.readlines() # Reduce to hosts we're interested in and sort by CPU-adjusted load. loads = [] for line in lines: if line.startswith(('HOSTNAME', '-')): continue hostname, arch, ncpu, load, \ memtot, memuse, swapto, swapus = line.split() if self.pattern: if not fnmatch.fnmatchcase(hostname, self.pattern): continue try: load = float(load) ncpu = int(ncpu) except ValueError: continue loads.append((hostname, load / ncpu, ncpu)) loads = sorted(loads, key=lambda item: item[1]) # Return list of hostnames. hosts = [] for hostname, load, ncpu in loads: for i in range(ncpu): hosts.append(hostname) return hosts
def test_errormsg(self): logging.debug('') logging.debug('test_errormsg') cmd = 'dir' if sys.platform == 'win32' else 'ls' try: proc = ShellProc(cmd, stdout='stdout', stderr='stderr') proc.wait() finally: if os.path.exists('stdout'): os.remove('stdout') if os.path.exists('stderr'): os.remove('stderr') msg = proc.error_message(-signal.SIGTERM) if sys.platform == 'win32': self.assertEqual(msg, '') else: self.assertEqual(msg, ': SIGTERM')
def execute_command(self, command, stdin, stdout, stderr, env_vars, poll_delay, timeout): """ Run `command` in a subprocess if this server's `allow_shell` attribute is True. command: string Command line to be executed. stdin, stdout, stderr: string Filenames for the corresponding stream. env_vars: dict Environment variables for the command. poll_delay: float (seconds) Delay between polling subprocess for completion. timeout: float (seconds) Maximum time to wait for command completion. A value of zero implies no timeout. """ self._logger.debug('execute_command %r', command) if not self._allow_shell: self._logger.error('attempt to execute %r by %r', command, get_credentials().user) raise RuntimeError('shell access is not allowed by this server') for arg in (stdin, stdout, stderr): if isinstance(arg, basestring): self._check_path(arg, 'execute_command') try: process = ShellProc(command, stdin, stdout, stderr, env_vars) except Exception as exc: self._logger.error('exception creating process: %s', exc) raise self._logger.debug(' PID = %d', process.pid) return_code, error_msg = process.wait(poll_delay, timeout) self._logger.debug(' returning %s', (return_code, error_msg)) return (return_code, error_msg)
def _execute_local(self): """ Run command. """ self._logger.info('executing %s...', self.command) start_time = time.time() self._process = \ ShellProc(self.command, self.stdin, self.stdout, self.stderr, self.env_vars) self._logger.debug('PID = %d', self._process.pid) try: return_code, error_msg = \ self._process.wait(self.poll_delay, self.timeout) finally: self._process.close_files() self._process = None et = time.time() - start_time if et >= 60: #pragma no cover self._logger.info('elapsed time: %.1f sec.', et) return (return_code, error_msg)
def _unused_remote_port(address, port, user, identity): """ Return a (currently) unused port on `address`, default to `port`. """ if '@' in address: user, host = address.split('@') else: user = user or getpass.getuser() host = address if sys.platform == 'win32': # pragma no cover cmd = ['plink', '-batch', '-ssh'] else: cmd = ['ssh'] cmd += ['-l', user] if identity: cmd += ['-i', identity] cmd += ['-x', '-T'] # FIXME: this currently won't work for Windows if ssh doesn't connect to a # UNIX-like shell (cygwin, etc.) code = '''"import socket sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(('localhost', 0)) port = sock.getsockname()[1] sock.close() print 'port', port"''' cmd += [host, 'python', '-c', code.replace('\n', ';')] try: proc = ShellProc(cmd, stdout=PIPE, stderr=PIPE, universal_newlines=True) except Exception as exc: logging.warning("Can't get unused port on %s from %s (forcing %s): %s", host, cmd, port, exc) return port output = proc.stdout.read() for line in output.split('\n'): if line.startswith('port'): remote_port = int(line.split()[1]) logging.debug('Unused remote port %s on %s', remote_port, host) return remote_port else: logging.warning( "Can't get unused port on %s from %s (forcing %s):\n" "[stdout]\n%s\n[stderr]\n%s", host, cmd, port, output, proc.stderr.read()) return port
def _start_tunnel(address, port, args, user, identity, prefix): """ Start an ssh tunnel process. """ if '@' in address: user, host = address.split('@') else: user = user or getpass.getuser() host = address if sys.platform == 'win32': # pragma no cover cmd = ['plink', '-batch', '-ssh'] else: cmd = ['ssh'] cmd += ['-l', user] if identity: cmd += ['-i', identity] cmd += ['-N', '-x', '-T'] # plink doesn't support '-n' (no stdin) cmd += args + [host] logname = '%s-%s-%s.log' % (prefix, host, port) logname = os.path.join(os.getcwd(), logname) stdout = open(logname, 'w') tunnel_proc = None try: tunnel_proc = ShellProc(cmd, stdout=stdout, stderr=STDOUT) except Exception as exc: raise RuntimeError("Can't create ssh tunnel process from %s: %s" % (cmd, exc)) time.sleep(1) exitcode = tunnel_proc.poll() if exitcode is not None: raise RuntimeError('ssh tunnel process for %s:%s exited with exitcode' ' %d, output in %s' % (address, port, exitcode, logname)) return (_cleanup_tunnel, tunnel_proc, stdout, logname, os.getpid())
def _start_tunnel(address, port, args, user, identity, prefix): """ Start an ssh tunnel process. """ if "@" in address: user, host = address.split("@") else: user = user or getpass.getuser() host = address if sys.platform == "win32": # pragma no cover cmd = ["plink", "-batch", "-ssh"] else: cmd = ["ssh"] cmd += ["-l", user] if identity: cmd += ["-i", identity] cmd += ["-N", "-x", "-T"] # plink doesn't support '-n' (no stdin) cmd += args + [host] logname = "%s-%s-%s.log" % (prefix, host, port) logname = os.path.join(os.getcwd(), logname) stdout = open(logname, "w") tunnel_proc = None try: tunnel_proc = ShellProc(cmd, stdout=stdout, stderr=STDOUT) except Exception as exc: raise RuntimeError("Can't create ssh tunnel process from %s: %s" % (cmd, exc)) time.sleep(1) exitcode = tunnel_proc.poll() if exitcode is not None: raise RuntimeError( "ssh tunnel process for %s:%s exited with exitcode" " %d, output in %s" % (address, port, exitcode, logname) ) return (_cleanup_tunnel, tunnel_proc, stdout, logname, os.getpid())
def _execute_local(self): """ Run command. """ self._logger.info("executing '%s'...", self.command) start_time = time.time() self._process = \ ShellProc(self.command, self.stdin, self.stdout, self.stderr, self.env_vars) self._logger.debug('PID = %d', self._process.pid) try: return_code, error_msg = \ self._process.wait(self.poll_delay, self.timeout) finally: self._process.close_files() self._process = None et = time.time() - start_time if et >= 60: #pragma no cover self._logger.info('elapsed time: %.1f sec.', et) return (return_code, error_msg)
def execute_command(self, resource_desc): """ Submit command based on `resource_desc`. resource_desc: dict Description of command and required resources. The '-V' `qsub` option is always used to export the current environment to the job. This environment is first updated with any 'job_environment' data. The '-sync yes' `qsub` option is used to wait for job completion. Other job resource keys are processed as follows: ========================= ========================= Resource Key Translation ========================= ========================= submit_as_hold -h ------------------------- ------------------------- rerunnable -r yes|no ------------------------- ------------------------- working_directory -wd `value` ------------------------- ------------------------- job_category Sets parallel environment ------------------------- ------------------------- min_cpus Sets parallel environment ------------------------- ------------------------- max_cpus Sets parallel environment ------------------------- ------------------------- min_phys_memory Ignored ------------------------- ------------------------- email -M `value` ------------------------- ------------------------- email_on_started -m b ------------------------- ------------------------- email_on_terminated -m e ------------------------- ------------------------- job_name -N `value` ------------------------- ------------------------- input_path -i `value` ------------------------- ------------------------- output_path -o `value` ------------------------- ------------------------- error_path -e `value` ------------------------- ------------------------- join_files -j yes|no ------------------------- ------------------------- reservation_id -ar `value` ------------------------- ------------------------- queue_name -q `value` ------------------------- ------------------------- priority -p `value` ------------------------- ------------------------- start_time -a `value` ------------------------- ------------------------- deadline_time Ignored ------------------------- ------------------------- accounting_id -A `value` ========================= ========================= Where `value` is the corresponding resource value. If 'working_directory' is not specified, add ``-cwd``. If 'input_path' is not specified, add ``-i /dev/null``. If 'output_path' is not specified, add ``-o <remote_command>.stdout``. If 'error_path' is not specified, add ``-j yes``. If 'native_specification' is specified, it is added to the `qsub` command just before 'remote_command' and 'args'. If specified, 'job_category' is used to index into the category map set up during allocator configuration. The mapped category name as well as the 'min_cpus' and 'max_cpus' values are used with the ``-pe`` qsub option. Some resource limits are also handled: ==================== ========================= Resource Key Translation ==================== ========================= core_file_size Ignored -------------------- ------------------------- data_seg_size Ignored -------------------- ------------------------- file_size Ignored -------------------- ------------------------- open_files Ignored -------------------- ------------------------- stack_size Ignored -------------------- ------------------------- virtual_memory Ignored -------------------- ------------------------- cpu_time -l h_cpu= `value` -------------------- ------------------------- wallclock_time -l h_rt= `value` ==================== ========================= Output from `qsub` itself is routed to ``qsub.out``. """ self.home_dir = os.path.expanduser('~') self.work_dir = '' cmd = list(self._QSUB) cmd.extend(('-V', '-sync', 'yes', '-b', 'yes')) env = None inp, out, err = None, None, None # Set working directory now, for possible path fixing. try: value = resource_desc['working_directory'] except KeyError: pass else: self.work_dir = self._fix_path(value) cmd.extend(('-wd', value)) # Process description in fixed, repeatable order. keys = ('submit_as_hold', 'rerunnable', 'job_environment', 'email', 'email_on_started', 'email_on_terminated', 'job_name', 'input_path', 'output_path', 'error_path', 'join_files', 'reservation_id', 'queue_name', 'priority', 'start_time', 'accounting_id') email_events = '' for key in keys: try: value = resource_desc[key] except KeyError: continue if key == 'submit_as_hold': if value: cmd.append('-h') elif key == 'rerunnable': cmd.extend(('-r', 'yes' if value else 'no')) elif key == 'job_environment': env = value elif key == 'email': cmd.extend(('-M', ','.join(value))) elif key == 'email_on_started': email_events += 'b' elif key == 'email_on_terminated': email_events += 'e' elif key == 'job_name': cmd.extend(('-N', self._jobname(value))) elif key == 'input_path': cmd.extend(('-i', self._fix_path(value))) inp = value elif key == 'output_path': cmd.extend(('-o', self._fix_path(value))) out = value elif key == 'error_path': cmd.extend(('-e', self._fix_path(value))) err = value elif key == 'join_files': cmd.extend(('-j', 'yes' if value else 'no')) if value: err = 'yes' elif key == 'reservation_id': cmd.extend(('-ar', value)) elif key == 'queue_name': cmd.extend(('-q', value)) elif key == 'priority': cmd.extend(('-p', str(value))) elif key == 'start_time': cmd.extend(('-a', value.strftime('%Y%m%d%H%M.%S'))) elif key == 'accounting_id': cmd.extend(('-A', value)) if email_events: cmd.extend(('-m', email_events)) # Setup parallel environment. if 'job_category' in resource_desc: job_category = resource_desc['job_category'] try: parallel_environment = self.category_map[job_category] except KeyError: msg = 'No mapping for job_category %r' % job_category self._logger.error(msg) raise ValueError(msg) min_cpus = resource_desc.get('min_cpus', 1) max_cpus = resource_desc.get('max_cpus', min_cpus) cmd.extend(('-pe', parallel_environment, '%d-%d' % (min_cpus, max_cpus))) # Set resource limits. if 'resource_limits' in resource_desc: limits = resource_desc['resource_limits'] if 'cpu_time' in limits: cpu_time = limits['cpu_time'] cmd.extend(('-l', 'h_cpu=%s' % self._timelimit(cpu_time))) if 'wallclock_time' in limits: wall_time = limits['wallclock_time'] cmd.extend(('-l', 'h_rt=%s' % self._timelimit(wall_time))) # Set default command configuration. if not self.work_dir: cmd.append('-cwd') if inp is None: cmd.extend(('-i', DEV_NULL)) if out is None: base = os.path.basename(resource_desc['remote_command']) cmd.extend(('-o', '%s.stdout' % base)) if err is None: cmd.extend(('-j', 'yes')) # Add 'escape' clause. if 'native_specification' in resource_desc: cmd.extend(resource_desc['native_specification']) cmd.append(self._fix_path(resource_desc['remote_command'])) if 'args' in resource_desc: for arg in resource_desc['args']: cmd.append(self._fix_path(arg)) self._logger.info('%r', ' '.join(cmd)) try: process = ShellProc(cmd, DEV_NULL, 'qsub.out', STDOUT, env) except Exception as exc: self._logger.error('exception creating process: %s', exc) raise self._logger.debug(' PID = %d', process.pid) return_code, error_msg = process.wait(1) self._logger.debug(' returning %s', (return_code, error_msg)) return (return_code, error_msg)
def execute_command(self, resource_desc): """ Submit command based on `resource_desc`. resource_desc: dict Description of command and required resources. The '-V' `qsub` option is always used to export the current environment to the job. This environment is first updated with any 'job_environment' data. The '-W block=true' `qsub` option is used to wait for job completion. Other job resource keys are processed as follows: ========================= =========================== Resource Key Translation ========================= =========================== ``submit_as_hold`` -h ------------------------- --------------------------- rerunnable -r y|n ------------------------- --------------------------- ``working_directory`` Handled in generated script ------------------------- --------------------------- ``job_category`` Ignored ------------------------- --------------------------- ``min_cpus`` -l select= `value` :ncpus=1 ------------------------- --------------------------- ``max_cpus`` Ignored ------------------------- --------------------------- ``min_phys_memory`` Ignored ------------------------- --------------------------- email -M `value` ------------------------- --------------------------- ``email_on_started`` -m b ------------------------- --------------------------- ``email_on_terminated`` -m e ------------------------- --------------------------- ``job_name`` -N `value` ------------------------- --------------------------- ``input_path`` Handled in generated script ------------------------- --------------------------- ``output_path`` Handled in generated script ------------------------- --------------------------- ``error_path`` Handled in generated script ------------------------- --------------------------- ``join_files`` Handled in generated script ------------------------- --------------------------- ``reservation_id`` Ignored ------------------------- --------------------------- ``queue_name`` -q `value` ------------------------- --------------------------- priority -p `value` ------------------------- --------------------------- ``start_time`` -a `value` ------------------------- --------------------------- ``deadline_time`` Ignored ------------------------- --------------------------- ``accounting_id`` -W group_list= `value` ========================= =========================== Where `value` is the corresponding resource value. To support a working directory other than HOME or a PBS-generated scratch directory, a short script is written with PBS directives in the header. The script will change to the working directory and then run the command. If 'working_directory' is not specified, use current server directory. If 'input_path' is not specified, use ``/dev/null``. If 'output_path' is not specified, use ``<remote_command>.stdout``. If 'error_path' is not specified, use stdout. If 'native_specification' is specified, it is added to the `qsub` command just before the name of the generated script. If it contains a ``select`` clause, then that will prevent generation of a ``select`` clause related to 'min_cpus'. Some resource limits are also handled: ==================== ========================= Resource Key Translation ==================== ========================= ``core_file_size`` Ignored -------------------- ------------------------- ``data_seg_size`` Ignored -------------------- ------------------------- ``file_size`` Ignored -------------------- ------------------------- ``open_files`` Ignored -------------------- ------------------------- ``stack_size`` Ignored -------------------- ------------------------- ``virtual_memory`` Ignored -------------------- ------------------------- ``cpu_time`` Ignored -------------------- ------------------------- ``wallclock_time`` -l walltime= `value` ==================== ========================= Output from `qsub` itself is routed to ``qsub.out``. If the job reports an error, ``qsub.out`` will be appended to either `error_path`, or if that was not specified, stdout. """ self.home_dir = os.path.expanduser('~') self.work_dir = '' cmd = list(self._QSUB) cmd.extend(('-V', '-W', 'block=true', '-j', 'oe')) if sys.platform == 'win32': # pragma no cover prefix = 'REM PBS' cmd.extend(('-C', '"%s"' % prefix)) suffix = '-qsub.bat' else: prefix = '#PBS' cmd.extend(('-S', '/bin/sh')) suffix = '.qsub' env = None inp, out, err = None, None, None join_files = False # Set working directory now, for possible path fixing. try: value = resource_desc['working_directory'] except KeyError: pass else: self.work_dir = self._fix_path(value) # Write script to be submitted rather than putting everything on # 'qsub' command line. We have to do this since otherwise there's # no way to set an execution directory or input path. base = None if 'job_name' in resource_desc: base = self._jobname(resource_desc['job_name']) if not base: base = os.path.basename(resource_desc['remote_command']) script_name = '%s%s' % (base, suffix) native_specification = resource_desc.get('native_specification', []) with open(script_name, 'w') as script: if sys.platform == 'win32': # pragma no cover script.write('@echo off\n') else: script.write('#!/bin/sh\n') # PBS (at least at NAS) requires 'group_list' be set. if 'accounting_id' in resource_desc: accounting_id = resource_desc['accounting_id'] else: accounting_id = self.accounting_id script.write('%s -W group_list=%s\n' % (prefix, accounting_id.strip())) # Process description in fixed, repeatable order. keys = ('submit_as_hold', 'rerunnable', 'job_environment', 'min_cpus', 'email', 'email_on_started', 'email_on_terminated', 'job_name', 'input_path', 'output_path', 'error_path', 'join_files', 'queue_name', 'priority', 'start_time') email_events = '' for key in keys: try: value = resource_desc[key] except KeyError: continue if key == 'submit_as_hold': if value: script.write('%s -h\n' % prefix) elif key == 'rerunnable': script.write('%s -r %s\n' % (prefix, 'y' if value else 'n')) elif key == 'job_environment': env = value elif key == 'min_cpus': # Only write select clause if not in 'native_specification'. for arg in native_specification: if 'select' in arg: break else: script.write('%s -l select=%d:ncpus=1\n' % (prefix, value)) elif key == 'email': script.write('%s -M %s\n' % (prefix, ','.join(value))) elif key == 'email_on_started': email_events += 'b' elif key == 'email_on_terminated': email_events += 'e' elif key == 'job_name': value = value or base script.write('%s -N %s\n' % (prefix, self._jobname(value))) elif key == 'input_path': inp = value elif key == 'output_path': out = value elif key == 'error_path': err = value elif key == 'join_files': join_files = value elif key == 'queue_name': script.write('%s -q %s\n' % (prefix, value)) elif key == 'priority': script.write('%s -p %d\n' % (prefix, value)) elif key == 'start_time': script.write('%s -a %s\n' % (prefix, value.strftime('%Y%m%d%H%M.%S'))) if email_events: script.write('%s -m %s\n' % (prefix, email_events)) # Set resource limits. if 'resource_limits' in resource_desc: limits = resource_desc['resource_limits'] if 'wallclock_time' in limits: wall_time = limits['wallclock_time'] script.write('%s -l walltime=%s\n' % (prefix, self._timelimit(wall_time))) # Have script move to work directory relative to # home directory on execution host. home = os.path.realpath(os.path.expanduser('~')) work = os.path.realpath(self.work_dir or os.getcwd()) if work.startswith(home): work = work[len(home) + 1:] if sys.platform == 'win32': # pragma no cover script.write('cd %HOMEDRIVE%%HOMEPATH%\n') else: script.write('cd $HOME\n') else: # This can potentially cause problems... self._logger.warning('work %r not a descendant of home %r', work, home) if ' ' in work: work = '"%s"' % work script.write('cd %s\n' % work) script.write(self._fix_path(resource_desc['remote_command'])) if 'args' in resource_desc: for arg in resource_desc['args']: arg = self._fix_path(arg) if ' ' in arg and arg[0] not in ('"', "'"): arg = '"%s"' % arg script.write(' %s' % arg) script.write(' <%s' % (inp or DEV_NULL)) script.write(' >%s' % (out or '%s.stdout' % base)) if join_files or err is None: script.write(' 2>&1') else: script.write(' 2>%s' % err) script.write('\n') if sys.platform != 'win32': os.chmod(script_name, 0700) # Add 'escape' clause. cmd.extend(native_specification) with open(script_name, 'rU') as inp: self._logger.debug('%s:', script_name) for line in inp: self._logger.debug(' %s', line.rstrip()) # Submit job. cmd.append(os.path.join('.', script_name)) self._logger.info('%r', ' '.join(cmd)) try: process = ShellProc(cmd, DEV_NULL, 'qsub.out', STDOUT, env) except Exception as exc: self._logger.error('exception creating process: %s', exc) if os.path.exists('qsub.out'): with open('qsub.out', 'rU') as inp: self._logger.error('qsub.out:') for line in inp: self._logger.error(' %s', line.rstrip()) raise # Submitted, wait for completion. self._logger.debug(' PID = %d', process.pid) return_code, error_msg = process.wait(1) self._logger.debug(' returning %s', (return_code, error_msg)) if return_code and os.path.exists('qsub.out'): if join_files or err is None: qsub_echo = out or '%s.stdout' % base else: qsub_echo = err with open('qsub.out', 'rU') as inp: with open(qsub_echo, 'a+') as out: self._logger.error('qsub.out:') out.write('===== qsub.out =====\n') for line in inp: self._logger.error(' %s', line.rstrip()) out.write(line) return (return_code, error_msg)
def execute_command(self, resource_desc): """ Submit command based on `resource_desc`. resource_desc: dict Description of command and required resources. The '-V' `qsub` option is always used to export the current environment to the job. This environment is first updated with any 'job_environment' data. The '-sync yes' `qsub` option is used to wait for job completion. Other job resource keys are processed as follows: ========================= ========================= Resource Key Translation ========================= ========================= ``submit_as_hold`` -h ------------------------- ------------------------- rerunnable -r yes|no ------------------------- ------------------------- ``working_directory`` -wd `value` ------------------------- ------------------------- ``job_category`` Sets parallel environment ------------------------- ------------------------- ``min_cpus`` Sets parallel environment ------------------------- ------------------------- ``max_cpus`` Sets parallel environment ------------------------- ------------------------- ``min_phys_memory`` Ignored ------------------------- ------------------------- email -M `value` ------------------------- ------------------------- ``email_on_started`` -m b ------------------------- ------------------------- ``email_on_terminated`` -m e ------------------------- ------------------------- ``job_name`` -N `value` ------------------------- ------------------------- ``input_path`` -i `value` ------------------------- ------------------------- ``output_path`` -o `value` ------------------------- ------------------------- ``error_path`` -e `value` ------------------------- ------------------------- ``join_files`` -j yes|no ------------------------- ------------------------- ``reservation_id`` -ar `value` ------------------------- ------------------------- ``queue_name`` -q `value` ------------------------- ------------------------- priority -p `value` ------------------------- ------------------------- ``start_time`` -a `value` ------------------------- ------------------------- ``deadline_time`` Ignored ------------------------- ------------------------- ``accounting_id`` -A `value` ========================= ========================= Where `value` is the corresponding resource value. If 'working_directory' is not specified, add ``-cwd``. If 'input_path' is not specified, add ``-i /dev/null``. If 'output_path' is not specified, add ``-o <remote_command>.stdout``. If 'error_path' is not specified, add ``-j yes``. If 'native_specification' is specified, it is added to the `qsub` command just before 'remote_command' and 'args'. If specified, 'job_category' is used to index into the category map set up during allocator configuration. The mapped category name as well as the 'min_cpus' and 'max_cpus' values are used with the ``-pe`` qsub option. Some resource limits are also handled: ==================== ========================= Resource Key Translation ==================== ========================= ``core_file_size`` Ignored -------------------- ------------------------- ``data_seg_size`` Ignored -------------------- ------------------------- ``file_size`` Ignored -------------------- ------------------------- ``open_files`` Ignored -------------------- ------------------------- ``stack_size`` Ignored -------------------- ------------------------- ``virtual_memory`` Ignored -------------------- ------------------------- ``cpu_time`` -l h_cpu= `value` -------------------- ------------------------- ``wallclock_time`` -l h_rt= `value` ==================== ========================= Output from `qsub` itself is routed to ``qsub.out``. """ self.home_dir = os.path.expanduser('~') self.work_dir = '' cmd = list(self._QSUB) cmd.extend(('-V', '-sync', 'yes', '-b', 'yes')) env = None inp, out, err = None, None, None # Set working directory now, for possible path fixing. try: value = resource_desc['working_directory'] except KeyError: pass else: self.work_dir = self._fix_path(value) cmd.extend(('-wd', value)) # Process description in fixed, repeatable order. keys = ('submit_as_hold', 'rerunnable', 'job_environment', 'email', 'email_on_started', 'email_on_terminated', 'job_name', 'input_path', 'output_path', 'error_path', 'join_files', 'reservation_id', 'queue_name', 'priority', 'start_time', 'accounting_id') email_events = '' for key in keys: try: value = resource_desc[key] except KeyError: continue if key == 'submit_as_hold': if value: cmd.append('-h') elif key == 'rerunnable': cmd.extend(('-r', 'yes' if value else 'no')) elif key == 'job_environment': env = value elif key == 'email': cmd.extend(('-M', ','.join(value))) elif key == 'email_on_started': email_events += 'b' elif key == 'email_on_terminated': email_events += 'e' elif key == 'job_name': if value: cmd.extend(('-N', self._jobname(value))) elif key == 'input_path': cmd.extend(('-i', self._fix_path(value))) inp = value elif key == 'output_path': cmd.extend(('-o', self._fix_path(value))) out = value elif key == 'error_path': cmd.extend(('-e', self._fix_path(value))) err = value elif key == 'join_files': cmd.extend(('-j', 'yes' if value else 'no')) if value: err = 'yes' elif key == 'reservation_id': cmd.extend(('-ar', value)) elif key == 'queue_name': cmd.extend(('-q', value)) elif key == 'priority': cmd.extend(('-p', str(value))) elif key == 'start_time': cmd.extend(('-a', value.strftime('%Y%m%d%H%M.%S'))) elif key == 'accounting_id': cmd.extend(('-A', value)) if email_events: cmd.extend(('-m', email_events)) # Setup parallel environment. if 'job_category' in resource_desc: job_category = resource_desc['job_category'] try: parallel_environment = self.category_map[job_category] except KeyError: msg = 'No mapping for job_category %r' % job_category self._logger.error(msg) raise ValueError(msg) min_cpus = resource_desc.get('min_cpus', 1) max_cpus = resource_desc.get('max_cpus', min_cpus) cmd.extend( ('-pe', parallel_environment, '%d-%d' % (min_cpus, max_cpus))) # Set resource limits. if 'resource_limits' in resource_desc: limits = resource_desc['resource_limits'] if 'cpu_time' in limits: cpu_time = limits['cpu_time'] cmd.extend(('-l', 'h_cpu=%s' % self._timelimit(cpu_time))) if 'wallclock_time' in limits: wall_time = limits['wallclock_time'] cmd.extend(('-l', 'h_rt=%s' % self._timelimit(wall_time))) # Set default command configuration. if not self.work_dir: cmd.append('-cwd') if inp is None: cmd.extend(('-i', DEV_NULL)) if out is None: base = os.path.basename(resource_desc['remote_command']) cmd.extend(('-o', '%s.stdout' % base)) if err is None: cmd.extend(('-j', 'yes')) # Add 'escape' clause. if 'native_specification' in resource_desc: cmd.extend(resource_desc['native_specification']) cmd.append(self._fix_path(resource_desc['remote_command'])) if 'args' in resource_desc: for arg in resource_desc['args']: cmd.append(self._fix_path(arg)) self._logger.info('%r', ' '.join(cmd)) try: process = ShellProc(cmd, DEV_NULL, 'qsub.out', STDOUT, env) except Exception as exc: self._logger.error('exception creating process: %s', exc) raise self._logger.debug(' PID = %d', process.pid) return_code, error_msg = process.wait(1) self._logger.debug(' returning %s', (return_code, error_msg)) return (return_code, error_msg)
def execute_command(self, resource_desc): """ Submit command based on `resource_desc`. resource_desc: dict Description of command and required resources. The '-V' `qsub` option is always used to export the current environment to the job. This environment is first updated with any 'job_environment' data. The '-sync yes' `qsub` option is used to wait for job completion. Other job resource keys are processed as follows: ========================= ========================= Resource Key Translation ========================= ========================= ``submit_as_hold`` -h ------------------------- ------------------------- ``rerunnable`` -r yes|no ------------------------- ------------------------- ``working_directory`` Ignored ------------------------- ------------------------- ``job_category`` Sets parallel environment ------------------------- ------------------------- ``min_cpus`` Sets parallel environment ------------------------- ------------------------- ``max_cpus`` Sets parallel environment ------------------------- ------------------------- ``min_phys_memory`` Ignored ------------------------- ------------------------- ``email`` -M `value` ------------------------- ------------------------- ``email_on_started`` -m b ------------------------- ------------------------- ``email_on_terminated`` -m e ------------------------- ------------------------- ``job_name`` -N `value` ------------------------- ------------------------- ``input_path`` -i `value` ------------------------- ------------------------- ``output_path`` -o `value` ------------------------- ------------------------- ``error_path`` -e `value` ------------------------- ------------------------- ``join_files`` -j yes|no ------------------------- ------------------------- ``reservation_id`` -ar `value` ------------------------- ------------------------- ``queue_name`` -q `value` ------------------------- ------------------------- ``priority`` -p `value` ------------------------- ------------------------- ``start_time`` -a `value` ------------------------- ------------------------- ``deadline_time`` Ignored ------------------------- ------------------------- ``accounting_id`` -A `value` ========================= ========================= Where `value` is the corresponding resource value. The 'working_directory' key is ignored since the server has been started in this directory. ``-cwd`` is used in the `qsub` command. If 'input_path' is not specified, add ``-i /dev/null``. If 'output_path' is not specified, add ``-o <remote_command>.stdout``. If 'error_path' is not specified, add ``-j yes``. If 'native_specification' is specified, it is added to the `qsub` command just before 'remote_command' and 'args'. If specified, 'job_category' is used to index into the category map set up during allocator configuration. The mapped category name as well as the 'min_cpus' and 'max_cpus' values are used with the ``-pe`` qsub option. Some resource limits are also handled: ==================== ========================= Resource Key Translation ==================== ========================= ``core_file_size`` Ignored -------------------- ------------------------- ``data_seg_size`` Ignored -------------------- ------------------------- ``file_size`` Ignored -------------------- ------------------------- ``open_files`` Ignored -------------------- ------------------------- ``stack_size`` Ignored -------------------- ------------------------- ``virtual_memory`` Ignored -------------------- ------------------------- ``cpu_time`` -l h_cpu= `value` -------------------- ------------------------- ``wallclock_time`` -l h_rt= `value` ==================== ========================= Output from `qsub` itself is routed to ``qsub.out``. """ self.home_dir = os.path.expanduser("~") self.work_dir = os.getcwd() # Server started in working directory. cmd = list(self._QSUB) cmd.extend(("-V", "-sync", "yes", "-b", "yes", "-cwd")) env = None inp, out, err = None, None, None # Process description in fixed, repeatable order. keys = ( "submit_as_hold", "rerunnable", "job_environment", "email", "email_on_started", "email_on_terminated", "job_name", "input_path", "output_path", "error_path", "join_files", "reservation_id", "queue_name", "priority", "start_time", "accounting_id", ) email_events = "" for key in keys: try: value = resource_desc[key] except KeyError: continue if key == "submit_as_hold": if value: cmd.append("-h") elif key == "rerunnable": cmd.extend(("-r", "yes" if value else "no")) elif key == "job_environment": env = value elif key == "email": cmd.extend(("-M", ",".join(value))) elif key == "email_on_started": email_events += "b" elif key == "email_on_terminated": email_events += "e" elif key == "job_name": if value: cmd.extend(("-N", self._jobname(value))) elif key == "input_path": cmd.extend(("-i", self._fix_path(value))) inp = value elif key == "output_path": cmd.extend(("-o", self._fix_path(value))) out = value elif key == "error_path": cmd.extend(("-e", self._fix_path(value))) err = value elif key == "join_files": cmd.extend(("-j", "yes" if value else "no")) if value: err = "yes" elif key == "reservation_id": cmd.extend(("-ar", value)) elif key == "queue_name": cmd.extend(("-q", value)) elif key == "priority": cmd.extend(("-p", str(value))) elif key == "start_time": cmd.extend(("-a", value.strftime("%Y%m%d%H%M.%S"))) elif key == "accounting_id": cmd.extend(("-A", value)) if email_events: cmd.extend(("-m", email_events)) # Setup parallel environment. if "job_category" in resource_desc: job_category = resource_desc["job_category"] try: parallel_environment = self.category_map[job_category] except KeyError: msg = "No mapping for job_category %r" % job_category self._logger.error(msg) raise ValueError(msg) min_cpus = resource_desc.get("min_cpus", 1) max_cpus = resource_desc.get("max_cpus", min_cpus) cmd.extend(("-pe", parallel_environment, "%d-%d" % (min_cpus, max_cpus))) # Set resource limits. if "resource_limits" in resource_desc: limits = resource_desc["resource_limits"] if "cpu_time" in limits: cpu_time = limits["cpu_time"] cmd.extend(("-l", "h_cpu=%s" % self._timelimit(cpu_time))) if "wallclock_time" in limits: wall_time = limits["wallclock_time"] cmd.extend(("-l", "h_rt=%s" % self._timelimit(wall_time))) # Set default command configuration. if inp is None: cmd.extend(("-i", DEV_NULL)) if out is None: base = os.path.basename(resource_desc["remote_command"]) cmd.extend(("-o", "%s.stdout" % base)) if err is None: cmd.extend(("-j", "yes")) # Add 'escape' clause. if "native_specification" in resource_desc: cmd.extend(resource_desc["native_specification"]) cmd.append(self._fix_path(resource_desc["remote_command"])) if "args" in resource_desc: for arg in resource_desc["args"]: cmd.append(self._fix_path(arg)) self._logger.info("%r", " ".join(cmd)) try: process = ShellProc(cmd, DEV_NULL, "qsub.out", STDOUT, env) except Exception as exc: self._logger.error("exception creating process: %s", exc) raise self._logger.debug(" PID = %d", process.pid) return_code, error_msg = process.wait(1) self._logger.debug(" returning %s", (return_code, error_msg)) return (return_code, error_msg)
def start_server(authkey='PublicKey', address=None, port=0, prefix='server', allowed_hosts=None, allowed_users=None, allow_shell=False, allowed_types=None, timeout=None, tunnel=False, resources=None, log_prefix=None): """ Start an :class:`ObjServerFactory` service in a separate process in the current directory. authkey: string Authorization key; must be matched by clients. address: string IPv4 address, hostname, or pipe name. Default is the host's default IPv4 address. port: int Server port (default of 0 implies next available port). Note that ports below 1024 typically require special privileges. If port is negative, then a local pipe is used for communication. prefix: string Prefix for server config file and stdout/stderr file. allowed_hosts: list(string) Host address patterns to check against. Required if `port` >= 0. Ignored if `allowed_users` is specified. allowed_users: dict Dictionary of users and corresponding public keys allowed access. If None, *any* user may access. If empty, no user may access. The host portions of user strings are used for address patterns. allow_shell: bool If True, :meth:`execute_command` and :meth:`load_model` are allowed. Use with caution! allowed_types: list(string) Names of types which may be created. If None, then allow types listed by :meth:`get_available_types`. If empty, no types are allowed. timeout: int Seconds to wait for server to start. Note that public key generation can take a while. The default value of None will use an internally computed value based on host type (and for Windows, the availability of pyWin32). tunnel: bool If True, report host IP address but listen for connections from a local SSH tunnel. resources: string Filename for resource configuration. log_prefix: string Name used to identify remote remote logging messages from server. Implies that the local process will be receiving the messages. Returns ``(server_proc, config_filename)``. """ if timeout is None: if sys.platform == 'win32' and not HAVE_PYWIN32: # pragma no cover timeout = 120 else: timeout = 30 server_key = prefix + '.key' server_cfg = prefix + '.cfg' server_out = prefix + '.out' for path in (server_cfg, server_out): if os.path.exists(path): os.remove(path) with open(server_key, 'w') as out: out.write('%s\n' % authkey) factory_path = pkg_resources.resource_filename('openmdao.main', 'objserverfactory.py') args = ['python', factory_path, '--port', str(port), '--prefix', prefix] if address is not None: args.extend(['--address', address]) if tunnel: args.append('--tunnel') if resources is not None: args.append('--resources') args.append(resources) if allowed_users is not None: write_authorized_keys(allowed_users, 'users.allow', logging.getLogger()) args.extend(['--users', 'users.allow']) else: args.append('--allow-public') if port >= 0: if allowed_hosts is None: allowed_hosts = [socket.gethostbyname(socket.gethostname())] if allowed_hosts[0].startswith('127.') and \ '127.0.0.1' not in allowed_hosts: allowed_hosts.append('127.0.0.1') with open('hosts.allow', 'w') as out: for pattern in allowed_hosts: out.write('%s\n' % pattern) if sys.platform != 'win32' or HAVE_PYWIN32: make_private('hosts.allow') else: # pragma no cover logging.warning("Can't make hosts.allow private") if allow_shell: args.append('--allow-shell') if allowed_types is not None: with open('types.allow', 'w') as out: for typname in allowed_types: out.write('%s\n' % typname) if sys.platform != 'win32' or HAVE_PYWIN32: make_private('types.allow') else: # pragma no cover logging.warning("Can't make types.allow private") args.extend(['--types', 'types.allow']) if log_prefix is not None: log_host = socket.gethostname() log_port = logging_port(log_host, log_host) args.extend(['--log-host', log_host, '--log-port', str(log_port)]) if log_prefix: # Could be null (for default). args.extend(['--log-prefix', log_prefix]) proc = ShellProc(args, stdout=server_out, stderr=STDOUT) try: # Wait for valid server_cfg file. retry = 0 while (not os.path.exists(server_cfg)) or \ (os.path.getsize(server_cfg) == 0): return_code = proc.poll() if return_code: error_msg = proc.error_message(return_code) raise RuntimeError('Server startup failed %s' % error_msg) retry += 1 if retry < 10 * timeout: time.sleep(.1) # Hard to cause a startup timeout. else: # pragma no cover proc.terminate(timeout) raise RuntimeError('Server startup timeout') return (proc, server_cfg) finally: if os.path.exists(server_key): os.remove(server_key)
class ExternalCode(ComponentWithDerivatives): """ Run an external code as a component. """ PIPE = subprocess.PIPE STDOUT = subprocess.STDOUT # pylint: disable-msg=E1101 command = Str('', desc='The command to be executed.') env_vars = Dict({}, iotype='in', desc='Environment variables required by the command.') resources = Dict({}, iotype='in', desc='Resources required to run this component.') poll_delay = Float(0., low=0., units='s', iotype='in', desc='Delay between polling for command completion.' ' A value of zero will use an internally computed' ' default.') timeout = Float(0., low=0., iotype='in', units='s', desc='Maximum time to wait for command completion.' ' A value of zero implies an infinite wait.') timed_out = Bool(False, iotype='out', desc='True if the command timed-out.') return_code = Int(0, iotype='out', desc='Return code from the command.') def __init__(self, *args, **kwargs): super(ExternalCode, self).__init__(*args, **kwargs) self.stdin = None self.stdout = None self.stderr = "error.out" self._process = None self._server = None # This gets used by remote server. def get_access_controller(self): #pragma no cover """ Return :class:`AccessController` for this object. """ return _AccessController() @rbac(('owner', 'user')) def set(self, path, value, index=None, src=None, force=False): """ Don't allow setting of 'command' by remote client. """ if path in ('command', 'get_access_controller') and remote_access(): self.raise_exception('%r may not be set() remotely' % path, RuntimeError) return super(ExternalCode, self).set(path, value, index, src, force) def execute(self): """ Runs the specified command. First removes existing output (but not in/out) files. Then if `resources` have been specified, an appropriate server is allocated and the command is run on that server. Otherwise the command is run locally. """ self.return_code = -12345678 self.timed_out = False for metadata in self.external_files: if metadata.get('output', False) and \ not metadata.get('input', False): for path in glob.glob(metadata.path): if os.path.exists(path): os.remove(path) if not self.command: self.raise_exception('Null command line', ValueError) return_code = None error_msg = '' try: if self.resources: return_code, error_msg = self._execute_remote() else: return_code, error_msg = self._execute_local() if return_code is None: if self._stop: self.raise_exception('Run stopped', RunStopped) else: self.timed_out = True self.raise_exception('Timed out', RunInterrupted) elif return_code: if isinstance(self.stderr, str): stderrfile = open(self.stderr, 'r') error_desc = stderrfile.read() stderrfile.close() err_fragment = "\nError Output:\n%s" % error_desc else: err_fragment = error_msg self.raise_exception('return_code = %d%s' \ % (return_code, err_fragment), RuntimeError) finally: self.return_code = -999999 if return_code is None else return_code def _execute_local(self): """ Run command. """ self._logger.info("executing '%s'...", self.command) start_time = time.time() self._process = \ ShellProc(self.command, self.stdin, self.stdout, self.stderr, self.env_vars) self._logger.debug('PID = %d', self._process.pid) try: return_code, error_msg = \ self._process.wait(self.poll_delay, self.timeout) finally: self._process.close_files() self._process = None et = time.time() - start_time if et >= 60: #pragma no cover self._logger.info('elapsed time: %.1f sec.', et) return (return_code, error_msg) def _execute_remote(self): """ Allocate a server based on required resources, send inputs, run command, and retrieve results. """ # Allocate server. self._server, server_info = RAM.allocate(self.resources) if self._server is None: self.raise_exception('Server allocation failed :-(', RuntimeError) return_code = -88888888 error_msg = '' try: # Send inputs. patterns = [] for metadata in self.external_files: if metadata.get('input', False): patterns.append(metadata.path) if patterns: self._send_inputs(patterns) else: self._logger.debug("No input metadata paths") # Run command. self._logger.info("executing '%s'...", self.command) start_time = time.time() return_code, error_msg = \ self._server.execute_command(self.command, self.stdin, self.stdout, self.stderr, self.env_vars, self.poll_delay, self.timeout) et = time.time() - start_time if et >= 60: #pragma no cover self._logger.info('elapsed time: %f sec.', et) # Retrieve results. patterns = [] for metadata in self.external_files: if metadata.get('output', False): patterns.append(metadata.path) if patterns: self._retrieve_results(patterns) else: self._logger.debug("No output metadata paths") finally: RAM.release(self._server) self._server = None return (return_code, error_msg) def _send_inputs(self, patterns): """ Sends input files matching `patterns`. """ self._logger.info('sending inputs...') start_time = time.time() filename = 'inputs.zip' pfiles, pbytes = pack_zipfile(patterns, filename, self._logger) try: filexfer(None, filename, self._server, filename, 'b') ufiles, ubytes = self._server.unpack_zipfile(filename) finally: os.remove(filename) # Difficult to force file transfer error. if ufiles != pfiles or ubytes != pbytes: #pragma no cover msg = 'Inputs xfer error: %d:%d vs. %d:%d' \ % (ufiles, ubytes, pfiles, pbytes) self.raise_exception(msg, RuntimeError) et = time.time() - start_time if et >= 60: #pragma no cover self._logger.info('elapsed time: %f sec.', et) def _retrieve_results(self, patterns): """ Retrieves result files matching `patterns`. """ self._logger.info('retrieving results...') start_time = time.time() filename = 'outputs.zip' pfiles, pbytes = self._server.pack_zipfile(tuple(patterns), filename) try: filexfer(self._server, filename, None, filename, 'b') ufiles, ubytes = unpack_zipfile(filename, self._logger) finally: os.remove(filename) # Difficult to force file transfer error. if ufiles != pfiles or ubytes != pbytes: #pragma no cover msg = 'Results xfer error: %d:%d vs. %d:%d' \ % (ufiles, ubytes, pfiles, pbytes) self.raise_exception(msg, RuntimeError) et = time.time() - start_time if et >= 60: #pragma no cover self._logger.info('elapsed time: %f sec.', et) def stop(self): """ Stop the external code. """ self._stop = True if self._process: self._process.terminate() def copy_inputs(self, inputs_dir, patterns): """ Copy inputs from `inputs_dir` that match `patterns`. inputs_dir: string Directory to copy files from. Relative paths are evaluated from the component's execution directory. patterns: list or string One or more :mod:`glob` patterns to match against. This can be useful for resetting problem state. """ self._logger.info('copying initial inputs from %s...', inputs_dir) with self.dir_context: if not os.path.exists(inputs_dir): self.raise_exception("inputs_dir '%s' does not exist" \ % inputs_dir, RuntimeError) self._copy(inputs_dir, patterns) def copy_results(self, results_dir, patterns): """ Copy files from `results_dir` that match `patterns`. results_dir: string Directory to copy files from. Relative paths are evaluated from the component's execution directory. patterns: list or string One or more :mod:`glob` patterns to match against. This can be useful for workflow debugging when the external code takes a long time to execute. """ self._logger.info('copying precomputed results from %s...', results_dir) with self.dir_context: if not os.path.exists(results_dir): self.raise_exception("results_dir '%s' does not exist" \ % results_dir, RuntimeError) self._copy(results_dir, patterns) def _copy(self, directory, patterns): """ Copy files from `directory` that match `patterns` to the current directory and ensure they are writable. directory: string Directory to copy files from. patterns: list or string One or more :mod:`glob` patterns to match against. """ if isinstance(patterns, basestring): patterns = [patterns] for pattern in patterns: pattern = os.path.join(directory, pattern) for src_path in sorted(glob.glob(pattern)): dst_path = os.path.basename(src_path) self._logger.debug(' %s', src_path) shutil.copy(src_path, dst_path) # Ensure writable. mode = os.stat(dst_path).st_mode mode |= stat.S_IWUSR os.chmod(dst_path, mode)
def execute_command(self, resource_desc): """ Run command described by `resource_desc` in a subprocess if this server's `allow_shell` attribute is True. resource_desc: dict Contains job description. The current environment, along with any 'job_environment' specification, is in effect while running 'remote_command'. If 'input_path' is not specified, ``/dev/null`` or ``nul:`` is used. If 'output_path' is not specified, ``<remote_command>.stdout`` is used. If neither 'error_path' nor 'join_files' are specified, ``<remote_command>.stderr`` is used. If specified in the 'resource_limits' dictionary, 'wallclock_time' is used as a timeout. All other queuing resource keys are ignored. The ``HOME_DIRECTORY`` and ``WORKING_DIRECTORY`` placeholders are ignored. """ try: job_name = resource_desc['job_name'] except KeyError: job_name = '' command = resource_desc['remote_command'] self._check_path(command, 'execute_command') base = os.path.basename(command) command = [command] if 'args' in resource_desc: command.extend(resource_desc['args']) self._logger.debug('execute_command %s %r', job_name, command) if not self._allow_shell: self._logger.error('attempt to execute %r by %r', command, get_credentials().user) raise RuntimeError('shell access is not allowed by this server') env_vars = resource_desc.get('job_environment') try: stdin = resource_desc['input_path'] self._check_path(stdin, 'execute_command') except KeyError: stdin = DEV_NULL try: stdout = resource_desc['output_path'] self._check_path(stdout, 'execute_command') except KeyError: stdout = base+'.stdout' try: stderr = resource_desc['error_path'] self._check_path(stderr, 'execute_command') except KeyError: try: join_files = resource_desc['join_files'] except KeyError: stderr = base+'.stderr' else: stderr = STDOUT if join_files else base+'.stderr' limits = resource_desc.get('resource_limits', {}) timeout = limits.get('wallclock_time', 0) poll_delay = 1 try: process = ShellProc(command, stdin, stdout, stderr, env_vars) except Exception as exc: self._logger.error('exception creating process: %s', exc) raise self._logger.debug(' PID = %d', process.pid) return_code, error_msg = process.wait(poll_delay, timeout) self._logger.debug(' returning %s', (return_code, error_msg)) return (return_code, error_msg)
def execute_command(self, resource_desc): """ Submit command based on `resource_desc`. resource_desc: dict Description of command and required resources. The '-V' `qsub` option is always used to export the current environment to the job. This environment is first updated with any 'job_environment' data. The '-W block=true' `qsub` option is used to wait for job completion. Other job resource keys are processed as follows: ========================= =========================== Resource Key Translation ========================= =========================== submit_as_hold -h ------------------------- --------------------------- rerunnable -r y|n ------------------------- --------------------------- working_directory Handled in generated script ------------------------- --------------------------- job_category Ignored ------------------------- --------------------------- min_cpus -l select= `value` :ncpus=1 ------------------------- --------------------------- max_cpus Ignored ------------------------- --------------------------- min_phys_memory Ignored ------------------------- --------------------------- email -M `value` ------------------------- --------------------------- email_on_started -m b ------------------------- --------------------------- email_on_terminated -m e ------------------------- --------------------------- job_name -N `value` ------------------------- --------------------------- input_path Handled in generated script ------------------------- --------------------------- output_path Handled in generated script ------------------------- --------------------------- error_path Handled in generated script ------------------------- --------------------------- join_files Handled in generated script ------------------------- --------------------------- reservation_id Ignored ------------------------- --------------------------- queue_name -q `value` ------------------------- --------------------------- priority -p `value` ------------------------- --------------------------- start_time -a `value` ------------------------- --------------------------- deadline_time Ignored ------------------------- --------------------------- accounting_id -W group_list= `value` ========================= =========================== Where `value` is the corresponding resource value. In order to support a working directory other than HOME or a PBS-generated scratch directory, a short script is written with PBS directives in the header. The script will change to the working directory and then run the command. If 'working_directory' is not specified, use current server directory. If 'input_path' is not specified, use ``/dev/null``. If 'output_path' is not specified, use ``<remote_command>.stdout``. If 'error_path' is not specified, use stdout. If 'native_specification' is specified, it is added to the `qsub` command just before the name of the generated script. If it contains a ``select`` clause, then that will prevent generation of a ``select`` clause related to 'min_cpus'. Some resource limits are also handled: ==================== ========================= Resource Key Translation ==================== ========================= core_file_size Ignored -------------------- ------------------------- data_seg_size Ignored -------------------- ------------------------- file_size Ignored -------------------- ------------------------- open_files Ignored -------------------- ------------------------- stack_size Ignored -------------------- ------------------------- virtual_memory Ignored -------------------- ------------------------- cpu_time Ignored -------------------- ------------------------- wallclock_time -l walltime= `value` ==================== ========================= Output from `qsub` itself is routed to ``qsub.out``. """ self.home_dir = os.path.expanduser('~') self.work_dir = '' cmd = list(self._QSUB) cmd.extend(('-V', '-W', 'block=true', '-j', 'oe')) if sys.platform == 'win32': # pragma no cover prefix = 'REM PBS' cmd.extend(('-C', '"%s"' % prefix)) suffix = '-qsub.bat' else: prefix = '#PBS' cmd.extend(('-S', '/bin/sh')) suffix = '.qsub' env = None inp, out, err = None, None, None join_files = False # Set working directory now, for possible path fixing. try: value = resource_desc['working_directory'] except KeyError: pass else: self.work_dir = self._fix_path(value) # Write script to be submitted rather than putting everything on # 'qsub' command line. We have to do this since otherwise there's # no way to set an execution directory or input path. if 'job_name' in resource_desc: base = self._jobname(resource_desc['job_name']) else: base = os.path.basename(resource_desc['remote_command']) script_name = '%s%s' % (base, suffix) native_specification = resource_desc.get('native_specification', []) with open(script_name, 'w') as script: if sys.platform == 'win32': # pragma no cover script.write('@echo off\n') else: script.write('#!/bin/sh\n') # PBS (at least at NAS) requires 'group_list' be set. if 'accounting_id' in resource_desc: accounting_id = resource_desc['accounting_id'] else: accounting_id = self.accounting_id script.write('%s -W group_list=%s\n' % (prefix, accounting_id.strip())) # Process description in fixed, repeatable order. keys = ('submit_as_hold', 'rerunnable', 'job_environment', 'min_cpus', 'email', 'email_on_started', 'email_on_terminated', 'job_name', 'input_path', 'output_path', 'error_path', 'join_files', 'queue_name', 'priority', 'start_time') email_events = '' for key in keys: try: value = resource_desc[key] except KeyError: continue if key == 'submit_as_hold': if value: script.write('%s -h\n' % prefix) elif key == 'rerunnable': script.write('%s -r %s\n' % (prefix, 'y' if value else 'n')) elif key == 'job_environment': env = value elif key == 'min_cpus': # Only write select clause if not in 'native_specification'. for arg in native_specification: if 'select' in arg: break else: script.write('%s -l select=%d:ncpus=1\n' % (prefix, value)) elif key == 'email': script.write('%s -M %s\n' % (prefix, ','.join(value))) elif key == 'email_on_started': email_events += 'b' elif key == 'email_on_terminated': email_events += 'e' elif key == 'job_name': script.write('%s -N %s\n' % (prefix, self._jobname(value))) elif key == 'input_path': inp = value elif key == 'output_path': out = value elif key == 'error_path': err = value elif key == 'join_files': join_files = value elif key == 'queue_name': script.write('%s -q %s\n' % (prefix, value)) elif key == 'priority': script.write('%s -p %d\n' % (prefix, value)) elif key == 'start_time': script.write('%s -a %s\n' % (prefix, value.strftime('%Y%m%d%H%M.%S'))) if email_events: script.write('%s -m %s\n' % (prefix, email_events)) # Set resource limits. if 'resource_limits' in resource_desc: limits = resource_desc['resource_limits'] if 'wallclock_time' in limits: wall_time = limits['wallclock_time'] script.write('%s -l walltime=%s\n' % (prefix, self._timelimit(wall_time))) # Have script move to work directory relative to # home directory on execution host. home = os.path.realpath(os.path.expanduser('~')) work = os.path.realpath(self.work_dir or os.getcwd()) if work.startswith(home): work = work[len(home)+1:] if sys.platform == 'win32': # pragma no cover script.write('cd %HOMEDRIVE%%HOMEPATH%\n') else: script.write('cd $HOME\n') else: # This can potentially cause problems... self._logger.warning('work %r not a descendant of home %r', work, home) script.write('cd %s\n' % work) script.write(self._fix_path(resource_desc['remote_command'])) if 'args' in resource_desc: for arg in resource_desc['args']: script.write(' %s' % self._fix_path(arg)) script.write(' <%s' % (inp or DEV_NULL)) script.write(' >%s' % (out or '%s.stdout' % base)) if join_files or err is None: script.write(' 2>&1') else: script.write(' 2>%s' % err) script.write('\n') if sys.platform != 'win32': os.chmod(script_name, 0700) # Add 'escape' clause. cmd.extend(native_specification) cmd.append(os.path.join('.', script_name)) self._logger.info('%r', ' '.join(cmd)) try: process = ShellProc(cmd, DEV_NULL, 'qsub.out', STDOUT, env) except Exception as exc: self._logger.error('exception creating process: %s', exc) raise self._logger.debug(' PID = %d', process.pid) return_code, error_msg = process.wait(1) self._logger.debug(' returning %s', (return_code, error_msg)) return (return_code, error_msg)
def execute_command(self, resource_desc): """ Submit command based on `resource_desc`. resource_desc: dict Description of command and required resources. The '-V' `qsub` option is always used to export the current environment to the job. This environment is first updated with any 'job_environment' data. The '-sync yes' `qsub` option is used to wait for job completion. Other job resource keys are processed as follows: ========================= ==================== Resource Key Translation ========================= ==================== job_name -N `value` ------------------------- -------------------- working_directory -wd `value` ------------------------- -------------------- parallel_environment -pe `value` `n_cpus` ------------------------- -------------------- input_path -i `value` ------------------------- -------------------- output_path -o `value` ------------------------- -------------------- error_path -e `value` ------------------------- -------------------- join_files -j yes|no ------------------------- -------------------- email -M `value` ------------------------- -------------------- block_email -m n ------------------------- -------------------- email_events -m `value` ------------------------- -------------------- start_time -a `value` ------------------------- -------------------- deadline_time Not supported ------------------------- -------------------- hard_wallclock_time_limit -l h_rt= `value` ------------------------- -------------------- soft_wallclock_time_limit -l s_rt= `value` ------------------------- -------------------- hard_run_duration_limit -l h_cpu= `value` ------------------------- -------------------- soft_run_duration_limit -l s_cpu= `value` ------------------------- -------------------- job_category Not supported ========================= ==================== Where `value` is the corresponding resource value and `n_cpus` is the value of the 'n_cpus' resource, or 1. If 'working_directory' is not specified, add ``-cwd``. If 'input_path' is not specified, add ``-i /dev/null``. If 'output_path' is not specified, add ``-o <remote_command>.stdout``. If 'error_path' is not specified, add ``-j yes``. If 'native_specification' is specified, it is added to the `qsub` command just before 'remote_command' and 'args'. Output from `qsub` itself is routed to ``qsub.out``. """ self.home_dir = os.environ['HOME'] self.work_dir = '' cmd = [self._QSUB, '-V', '-sync', 'yes'] env = None inp, out, err = None, None, None # Set working directory now, for possible path fixing. try: value = resource_desc['working_directory'] except KeyError: pass else: self.work_dir = self._fix_path(value) cmd.append('-wd') cmd.append(value) # Process description in fixed, repeatable order. keys = ('job_name', 'job_environment', 'parallel_environment', 'input_path', 'output_path', 'error_path', 'join_files', 'email', 'block_email', 'email_events', 'start_time', 'hard_wallclock_time_limit', 'soft_wallclock_time_limit', 'hard_run_duration_limit', 'soft_run_duration_limit') for key in keys: try: value = resource_desc[key] except KeyError: continue if key == 'job_name': cmd.append('-N') cmd.append(value) elif key == 'job_environment': env = value elif key == 'parallel_environment': n_cpus = resource_desc.get('n_cpus', 1) cmd.append('-pe') cmd.append(value) cmd.append(str(n_cpus)) elif key == 'input_path': cmd.append('-i') cmd.append(self._fix_path(value)) inp = value elif key == 'output_path': cmd.append('-o') cmd.append(self._fix_path(value)) out = value elif key == 'error_path': cmd.append('-e') cmd.append(self._fix_path(value)) err = value elif key == 'join_files': cmd.append('-j') cmd.append('yes' if value else 'no') if value: err = 'yes' elif key == 'email': cmd.append('-M') cmd.append(','.join(value)) elif key == 'block_email': if value: cmd.append('-m') cmd.append('n') elif key == 'email_events': cmd.append('-m') cmd.append(value) elif key == 'start_time': cmd.append('-a') cmd.append(value) # May need to translate elif key == 'hard_wallclock_time_limit': cmd.append('-l') cmd.append('h_rt=%s' % self._make_time(value)) elif key == 'soft_wallclock_time_limit': cmd.append('-l') cmd.append('s_rt=%s' % self._make_time(value)) elif key == 'hard_run_duration_limit': cmd.append('-l') cmd.append('h_cpu=%s' % self._make_time(value)) elif key == 'soft_run_duration_limit': cmd.append('-l') cmd.append('s_cpu=%s' % self._make_time(value)) if not self.work_dir: cmd.append('-cwd') if inp is None: cmd.append('-i') cmd.append('/dev/null') if out is None: cmd.append('-o') cmd.append('%s.stdout' % os.path.basename(resource_desc['remote_command'])) if err is None: cmd.append('-j') cmd.append('yes') if 'native_specification' in resource_desc: cmd.extend(resource_desc['native_specification']) cmd.append(self._fix_path(resource_desc['remote_command'])) if 'args' in resource_desc: for arg in resource_desc['args']: cmd.append(self._fix_path(arg)) self._logger.info('%r', ' '.join(cmd)) try: process = ShellProc(cmd, '/dev/null', 'qsub.out', STDOUT, env) except Exception as exc: self._logger.error('exception creating process: %s', exc) raise self._logger.debug(' PID = %d', process.pid) return_code, error_msg = process.wait(1) self._logger.debug(' returning %s', (return_code, error_msg)) return (return_code, error_msg)
class ExternalCode(ComponentWithDerivatives): """ Run an external code as a component. The component can be configured to run the code on a remote server, see :meth:`execute`. """ PIPE = subprocess.PIPE STDOUT = subprocess.STDOUT # pylint: disable-msg=E1101 command = List(Str, desc='The command to be executed.') env_vars = Dict({}, iotype='in', desc='Environment variables required by the command.') resources = Dict({}, iotype='in', desc='Resources required to run this component.') poll_delay = Float(0., low=0., units='s', iotype='in', desc='Delay between polling for command completion.' ' A value of zero will use an internally computed' ' default.') timeout = Float(0., low=0., iotype='in', units='s', desc='Maximum time to wait for command completion.' ' A value of zero implies an infinite wait.') timed_out = Bool(False, iotype='out', desc='True if the command timed-out.') return_code = Int(0, iotype='out', desc='Return code from the command.') def __init__(self, *args, **kwargs): super(ExternalCode, self).__init__(*args, **kwargs) self.stdin = None self.stdout = None self.stderr = "error.out" self._process = None self._server = None # This gets used by remote server. def get_access_controller(self): #pragma no cover """ Return :class:`AccessController` for this object. """ return _AccessController() @rbac(('owner', 'user')) def set(self, path, value, index=None, src=None, force=False): """ Don't allow setting of 'command' by a remote client. """ if path in ('command', 'get_access_controller') and remote_access(): self.raise_exception('%r may not be set() remotely' % path, RuntimeError) return super(ExternalCode, self).set(path, value, index, src, force) def execute(self): """ Runs the specified command. First removes existing output (but not in/out) files. Then if `resources` have been specified, an appropriate server is allocated and the command is run on that server. Otherwise the command is run locally. When running remotely, the following resources are set: ======================= ===================================== Key Value ======================= ===================================== job_name self.get_pathname() ----------------------- ------------------------------------- remote_command self.command (first item) ----------------------- ------------------------------------- args self.command (2nd through last items) ----------------------- ------------------------------------- job_environment self.env_vars ----------------------- ------------------------------------- input_path self.stdin ----------------------- ------------------------------------- output_path self.stdout ----------------------- ------------------------------------- error_path self.stderr (if != STDOUT) ----------------------- ------------------------------------- join_files If self.stderr == STDOUT ----------------------- ------------------------------------- hard_run_duration_limit self.timeout (if non-zero) ======================= ===================================== .. note:: Input files to be sent to the remote server are defined by :class:`FileMetadata` entries in the `external_files` list with `input` True. Similarly, output files to be retrieved from the remote server are defined by entries with `output` True. .. warning:: Any file **not** labelled with `binary` True will undergo newline translation if the local and remote machines have different newline representations. Newline translation will corrupt a file which is binary but hasn't been labelled as such. """ self.return_code = -12345678 self.timed_out = False for metadata in self.external_files: if metadata.get('output', False) and \ not metadata.get('input', False): for path in glob.glob(metadata.path): if os.path.exists(path): os.remove(path) if not self.command: self.raise_exception('Null command line', ValueError) return_code = None error_msg = '' try: if self.resources: return_code, error_msg = self._execute_remote() else: return_code, error_msg = self._execute_local() if return_code is None: if self._stop: self.raise_exception('Run stopped', RunStopped) else: self.timed_out = True self.raise_exception('Timed out', RunInterrupted) elif return_code: if isinstance(self.stderr, str): stderrfile = open(self.stderr, 'r') error_desc = stderrfile.read() stderrfile.close() err_fragment = "\nError Output:\n%s" % error_desc else: err_fragment = error_msg self.raise_exception('return_code = %d%s' \ % (return_code, err_fragment), RuntimeError) finally: self.return_code = -999999 if return_code is None else return_code def _execute_local(self): """ Run command. """ self._logger.info('executing %s...', self.command) start_time = time.time() self._process = \ ShellProc(self.command, self.stdin, self.stdout, self.stderr, self.env_vars) self._logger.debug('PID = %d', self._process.pid) try: return_code, error_msg = \ self._process.wait(self.poll_delay, self.timeout) finally: self._process.close_files() self._process = None et = time.time() - start_time if et >= 60: #pragma no cover self._logger.info('elapsed time: %.1f sec.', et) return (return_code, error_msg) def _execute_remote(self): """ Allocate a server based on required resources, send inputs, run command, and retrieve results. """ # Allocate server. self._server, server_info = RAM.allocate(self.resources) if self._server is None: self.raise_exception('Server allocation failed :-(', RuntimeError) return_code = -88888888 error_msg = '' try: # Create resource description for command. rdesc = self.resources.copy() rdesc['job_name'] = self.get_pathname() rdesc['remote_command'] = self.command[0] if len(self.command) > 1: rdesc['args'] = self.command[1:] if self.env_vars: rdesc['job_environment'] = self.env_vars if self.stdin: rdesc['input_path'] = self.stdin if self.stdout: rdesc['output_path'] = self.stdout if self.stderr: if self.stderr == self.STDOUT: rdesc['join_files'] = True else: rdesc['error_path'] = self.stderr if self.timeout: rdesc['hard_run_duration_limit'] = self.timeout # Send inputs. patterns = [] textfiles = [] for metadata in self.external_files: if metadata.get('input', False): patterns.append(metadata.path) if not metadata.binary: textfiles.append(metadata.path) if patterns: self._send_inputs(patterns, textfiles) else: self._logger.debug('No input metadata paths') # Run command. self._logger.info('executing %s...', self.command) start_time = time.time() return_code, error_msg = \ self._server.execute_command(rdesc) et = time.time() - start_time if et >= 60: #pragma no cover self._logger.info('elapsed time: %.1f sec.', et) # Retrieve results. patterns = [] textfiles = [] for metadata in self.external_files: if metadata.get('output', False): patterns.append(metadata.path) if not metadata.binary: textfiles.append(metadata.path) if patterns: self._retrieve_results(patterns, textfiles) else: self._logger.debug('No output metadata paths') finally: RAM.release(self._server) self._server = None return (return_code, error_msg) def _send_inputs(self, patterns, textfiles): """ Sends input files matching `patterns`. """ self._logger.info('sending inputs...') start_time = time.time() filename = 'inputs.zip' pfiles, pbytes = pack_zipfile(patterns, filename, self._logger) try: filexfer(None, filename, self._server, filename, 'b') ufiles, ubytes = self._server.unpack_zipfile(filename, textfiles=textfiles) finally: os.remove(filename) # Difficult to force file transfer error. if ufiles != pfiles or ubytes != pbytes: #pragma no cover msg = 'Inputs xfer error: %d:%d vs. %d:%d' \ % (ufiles, ubytes, pfiles, pbytes) self.raise_exception(msg, RuntimeError) et = time.time() - start_time if et >= 60: #pragma no cover self._logger.info('elapsed time: %f sec.', et) def _retrieve_results(self, patterns, textfiles): """ Retrieves result files matching `patterns`. """ self._logger.info('retrieving results...') start_time = time.time() filename = 'outputs.zip' pfiles, pbytes = self._server.pack_zipfile(patterns, filename) filexfer(self._server, filename, None, filename, 'b') # Valid, but empty, file causes unpack_zipfile() problems. try: if os.path.getsize(filename) > 0: ufiles, ubytes = unpack_zipfile(filename, logger=self._logger, textfiles=textfiles) else: ufiles, ubytes = 0, 0 finally: os.remove(filename) # Difficult to force file transfer error. if ufiles != pfiles or ubytes != pbytes: #pragma no cover msg = 'Results xfer error: %d:%d vs. %d:%d' \ % (ufiles, ubytes, pfiles, pbytes) self.raise_exception(msg, RuntimeError) et = time.time() - start_time if et >= 60: #pragma no cover self._logger.info('elapsed time: %f sec.', et) def stop(self): """ Stop the external code. """ self._stop = True if self._process: self._process.terminate() def copy_inputs(self, inputs_dir, patterns): """ Copy inputs from `inputs_dir` that match `patterns`. inputs_dir: string Directory to copy files from. Relative paths are evaluated from the component's execution directory. patterns: list or string One or more :mod:`glob` patterns to match against. This can be useful for resetting problem state. """ self._logger.info('copying initial inputs from %s...', inputs_dir) with self.dir_context: if not os.path.exists(inputs_dir): self.raise_exception("inputs_dir '%s' does not exist" \ % inputs_dir, RuntimeError) self._copy(inputs_dir, patterns) def copy_results(self, results_dir, patterns): """ Copy files from `results_dir` that match `patterns`. results_dir: string Directory to copy files from. Relative paths are evaluated from the component's execution directory. patterns: list or string One or more :mod:`glob` patterns to match against. This can be useful for workflow debugging when the external code takes a long time to execute. """ self._logger.info('copying precomputed results from %s...', results_dir) with self.dir_context: if not os.path.exists(results_dir): self.raise_exception("results_dir '%s' does not exist" \ % results_dir, RuntimeError) self._copy(results_dir, patterns) def _copy(self, directory, patterns): """ Copy files from `directory` that match `patterns` to the current directory and ensure they are writable. directory: string Directory to copy files from. patterns: list or string One or more :mod:`glob` patterns to match against. """ if isinstance(patterns, basestring): patterns = [patterns] for pattern in patterns: pattern = os.path.join(directory, pattern) for src_path in sorted(glob.glob(pattern)): dst_path = os.path.basename(src_path) self._logger.debug(' %s', src_path) shutil.copy(src_path, dst_path) # Ensure writable. mode = os.stat(dst_path).st_mode mode |= stat.S_IWUSR os.chmod(dst_path, mode)
def start_server(authkey='PublicKey', address=None, port=0, prefix='server', allowed_hosts=None, allowed_users=None, allow_shell=False, allowed_types=None, timeout=None, tunnel=False, resources=None, log_prefix=None): """ Start an :class:`ObjServerFactory` service in a separate process in the current directory. authkey: string Authorization key, must be matched by clients. address: string IPv4 address, hostname, or pipe name. Default is the host's default IPv4 address. port: int Server port (default of 0 implies next available port). Note that ports below 1024 typically require special privileges. If port is negative, then a local pipe is used for communication. prefix: string Prefix for server config file and stdout/stderr file. allowed_hosts: list(string) Host address patterns to check against. Required if `port` >= 0. Ignored if `allowed_users` is specified. allowed_users: dict Dictionary of users and corresponding public keys allowed access. If None, *any* user may access. If empty, no user may access. The host portions of user strings are used for address patterns. allow_shell: bool If True, :meth:`execute_command` and :meth:`load_model` are allowed. Use with caution! allowed_types: list(string) Names of types which may be created. If None, then allow types listed by :meth:`get_available_types`. If empty, no types are allowed. timeout: int Seconds to wait for server to start. Note that public key generation can take a while. The default value of None will use an internally computed value based on host type (and for Windows, the availability of pyWin32). tunnel: bool If True, report host IP address but listen for connections from a local SSH tunnel. resources: string Filename for resource configuration. log_prefix: string Name used to identify remote remote logging messages from server. Implies that the local process will be receiving the messages. Returns ``(server_proc, config_filename)``. """ if timeout is None: if sys.platform == 'win32' and not HAVE_PYWIN32: #pragma no cover timeout = 120 else: timeout = 30 server_key = prefix+'.key' server_cfg = prefix+'.cfg' server_out = prefix+'.out' for path in (server_cfg, server_out): if os.path.exists(path): os.remove(path) with open(server_key, 'w') as out: out.write('%s\n' % authkey) factory_path = pkg_resources.resource_filename('openmdao.main', 'objserverfactory.py') args = ['python', factory_path, '--port', str(port), '--prefix', prefix] if address is not None: args.extend(['--address', address]) if tunnel: args.append('--tunnel') if resources is not None: args.append('--resources') args.append(resources) if allowed_users is not None: write_authorized_keys(allowed_users, 'users.allow', logging.getLogger()) args.extend(['--users', 'users.allow']) else: args.append('--allow-public') if port >= 0: if allowed_hosts is None: allowed_hosts = [socket.gethostbyname(socket.gethostname())] if allowed_hosts[0].startswith('127.') and \ '127.0.0.1' not in allowed_hosts: allowed_hosts.append('127.0.0.1') with open('hosts.allow', 'w') as out: for pattern in allowed_hosts: out.write('%s\n' % pattern) if sys.platform != 'win32' or HAVE_PYWIN32: make_private('hosts.allow') else: #pragma no cover logging.warning("Can't make hosts.allow private") if allow_shell: args.append('--allow-shell') if allowed_types is not None: with open('types.allow', 'w') as out: for typname in allowed_types: out.write('%s\n' % typname) if sys.platform != 'win32' or HAVE_PYWIN32: make_private('types.allow') else: #pragma no cover logging.warning("Can't make types.allow private") args.extend(['--types', 'types.allow']) if log_prefix is not None: log_host = socket.gethostname() log_port = logging_port(log_host, log_host) args.extend(['--log-host', log_host, '--log-port', str(log_port)]) if log_prefix: # Could be null (for default). args.extend(['--log-prefix', log_prefix]) proc = ShellProc(args, stdout=server_out, stderr=STDOUT) try: # Wait for valid server_cfg file. retry = 0 while (not os.path.exists(server_cfg)) or \ (os.path.getsize(server_cfg) == 0): return_code = proc.poll() if return_code: error_msg = proc.error_message(return_code) raise RuntimeError('Server startup failed %s' % error_msg) retry += 1 if retry < 10*timeout: time.sleep(.1) # Hard to cause a startup timeout. else: #pragma no cover proc.terminate(timeout) raise RuntimeError('Server startup timeout') return (proc, server_cfg) finally: if os.path.exists(server_key): os.remove(server_key)
def execute_command(self, resource_desc): """ Run command described by `resource_desc` in a subprocess if this server's `allow_shell` attribute is True. resource_desc: dict Contains job description. The current environment, along with any 'job_environment' specification, is in effect while running 'remote_command'. If 'input_path' is not specified, ``/dev/null`` or ``nul:`` is used. If 'output_path' is not specified, ``<remote_command>.stdout`` is used. If neither 'error_path' nor 'join_files' are specified, ``<remote_command>.stderr`` is used. If specified in the 'resource_limits' dictionary, 'wallclock_time' is used as a timeout. All other queuing resource keys are ignored. The ``HOME_DIRECTORY`` and ``WORKING_DIRECTORY`` placeholders are ignored. """ try: job_name = resource_desc['job_name'] except KeyError: job_name = '' command = resource_desc['remote_command'] self._check_path(command, 'execute_command') base = os.path.basename(command) command = [command] if 'args' in resource_desc: command.extend(resource_desc['args']) self._logger.debug('execute_command %s %r', job_name, command) if not self._allow_shell: self._logger.error('attempt to execute %r by %r', command, get_credentials().user) raise RuntimeError('shell access is not allowed by this server') env_vars = resource_desc.get('job_environment') try: stdin = resource_desc['input_path'] self._check_path(stdin, 'execute_command') except KeyError: stdin = DEV_NULL try: stdout = resource_desc['output_path'] self._check_path(stdout, 'execute_command') except KeyError: stdout = base + '.stdout' try: stderr = resource_desc['error_path'] self._check_path(stderr, 'execute_command') except KeyError: try: join_files = resource_desc['join_files'] except KeyError: stderr = base + '.stderr' else: stderr = STDOUT if join_files else base + '.stderr' limits = resource_desc.get('resource_limits', {}) timeout = limits.get('wallclock_time', 0) poll_delay = 1 try: process = ShellProc(command, stdin, stdout, stderr, env_vars) except Exception as exc: self._logger.error('exception creating process: %s', exc) raise self._logger.debug(' PID = %d', process.pid) return_code, error_msg = process.wait(poll_delay, timeout) self._logger.debug(' returning %s', (return_code, error_msg)) return (return_code, error_msg)