def main(): #pragma no cover """ Code which runs a host manager. Expects configuration data from parent on `stdin`. Replies with address and optionally public key. The environment variable ``OPENMDAO_KEEPDIRS`` can be used to avoid removal of the temporary directory used here. """ sys.stdout = open('stdout', 'w') sys.stderr = open('stderr', 'w') # util.log_to_stderr(logging.DEBUG) # Avoid root possibly masking us. logging.getLogger().setLevel(logging.DEBUG) import platform hostname = platform.node() pid = os.getpid() ident = '(%s:%d)' % (hostname, pid) print '%s main startup' % ident sys.stdout.flush() # Get data from parent over stdin. data = cPickle.load(sys.stdin) sys.stdin.close() print '%s data received' % ident authkey = data['authkey'] allow_shell = data['allow_shell'] allowed_users = data['allowed_users'] print '%s using %s authentication' % (ident, keytype(authkey)) if allowed_users is None: print '%s allowed_users: ANY' % ident else: print '%s allowed_users: %s' % (ident, sorted(allowed_users.keys())) if allow_shell: print '%s ALLOWING SHELL ACCESS' % ident sys.stdout.flush() log_level = data['dist_log_level'] os.environ['OPENMDAO_KEEPDIRS'] = data['keep_dirs'] exc = None server = None try: # Update HostManager registry. dct = data['registry'] print '%s registry:' % ident for name in dct.keys(): module = dct[name] print ' %s: %s' % (name, module) mod = __import__(module, fromlist=name) cls = getattr(mod, name) register(cls, HostManager) # Set some stuff. print '%s preparing to fork, log level %d' % (ident, log_level) sys.stdout.flush() util.get_logger().setLevel(log_level) forking.prepare(data) # Create Server for a HostManager object. name = '%d[%d]' % (data['index'], pid) logging.getLogger(name).setLevel(log_level) server = OpenMDAO_Server(HostManager._registry, (hostname, 0), authkey, 'pickle', name=name, allowed_users=allowed_users, allowed_hosts=[data['parent_address'][0]]) except Exception as exc: print '%s caught exception: %s' % (ident, exc) # Report server address and public key back to parent. print '%s connecting to parent at %s' % (ident, data['parent_address']) sys.stdout.flush() conn = connection.Client(data['parent_address'], authkey=authkey) if exc: conn.send((data['index'], None, str(exc))) else: conn.send((data['index'], server.address, server.public_key_text)) conn.close() if exc: print '%s exiting' % ident sys.exit(1) # Set name etc. current_process()._server = server current_process()._name = 'Host-%s:%s' % server.address current_process().authkey = authkey logging.getLogger(current_process()._name).setLevel(log_level) util._run_after_forkers() # Register a cleanup function. def cleanup(directory): keep_dirs = int(os.environ.get('OPENMDAO_KEEPDIRS', '0')) if not keep_dirs and os.path.exists(directory): print '%s removing directory %s' % (ident, directory) shutil.rmtree(directory) print '%s shutting down host manager' % ident util.Finalize(None, cleanup, args=[data['dir']], exitpriority=0) # Start host manager. print '%s remote host manager starting in %s' % (ident, data['dir']) sys.stdout.flush() server.serve_forever()
return path @staticmethod def _jobname(name): """ Create legal job name from `name`. """ name = name.strip()[:15] # 15 characters max. name = name.translate(_XLATE) if name and not name[0].isalpha(): name = 'Z%s' % name[1:] return name @staticmethod def _timelimit(seconds): """ Make time limit string from `seconds`. """ hours = int(seconds / (60 * 60)) seconds -= hours * 60 * 60 minutes = int(seconds / 60) seconds -= minutes * 60 seconds = int(seconds) return '%d:%02d:%02d' % (hours, minutes, seconds) class _ServerManager(OpenMDAO_Manager): """ A :class:`multiprocessing.Manager` which manages :class:`PBS_Server`. """ pass register(PBS_Server, _ServerManager, 'pbs.pbs')
obj = server.create(typname, version, None, res_desc, **ctor_args) else: obj = server self._logger.log(LOG_DEBUG2, 'create returning %r at %r', obj, obj._token.address) return obj class _FactoryManager(OpenMDAO_Manager): """ A :class:`multiprocessing.Manager` which manages :class:`ObjServerFactory`. """ pass register(ObjServerFactory, _FactoryManager, 'openmdao.main.objserverfactory') class ObjServer(object): """ An object which knows how to create other objects, load a model, etc. All remote file accesses must be within the tree rooted in the current directory at startup. name: string Name of server, used in log messages, etc. allow_shell: bool If True, :meth:`execute_command` and :meth:`load_model` are allowed. Use with caution!
Description of required resources. criteria: dict The dictionary returned by :meth:`time_estimate`. """ credentials = get_credentials() allowed_users = {credentials.user: credentials.public_key} try: return self.create(typname="", allowed_users=allowed_users, name=name) # Shouldn't happen... except Exception as exc: # pragma no cover self._logger.error("create failed: %r", exc) return None register(LocalAllocator, mp_distributing.Cluster) register(LocalAllocator, mp_distributing.HostManager) # Cluster allocation requires ssh configuration and multiple hosts. class ClusterAllocator(object): # pragma no cover """ Cluster-based resource allocator. This allocator manages a collection of :class:`LocalAllocator`, one for each machine in the cluster. name: string Name of allocator, used in log messages, etc. machines: list(dict) Dictionaries providing configuration data for each machine in the cluster. At a minimum, each dictionary must specify a host
def main(): #pragma no cover """ Code which runs a host manager. Expects configuration data from parent on `stdin`. Replies with address and optionally public key. The environment variable ``OPENMDAO_KEEPDIRS`` can be used to avoid removal of the temporary directory used here. """ sys.stdout = open('stdout', 'w') sys.stderr = open('stderr', 'w') # util.log_to_stderr(logging.DEBUG) # Avoid root possibly masking us. logging.getLogger().setLevel(logging.DEBUG) pid = os.getpid() ident = '(%s:%d)' % (socket.gethostname(), pid) print '%s main startup' % ident sys.stdout.flush() # Get data from parent over stdin. dump = sys.stdin.read() sys.stdin.close() print '%s data received (%s)' % (ident, len(dump)) data = cPickle.loads(base64.b64decode(dump)) hostname = data['hostname'] print '%s using hostname %s' % (ident, hostname) authkey = data['authkey'] print '%s using %s authentication' % (ident, keytype(authkey)) allowed_users = data['allowed_users'] if allowed_users is None: print '%s allowed_users: ANY' % ident else: print '%s allowed_users: %s' % (ident, sorted(allowed_users.keys())) allow_shell = data['allow_shell'] if allow_shell: print '%s ALLOWING SHELL ACCESS' % ident allow_tunneling = data['allow_tunneling'] print '%s allow_tunneling: %s' % (ident, allow_tunneling) if allow_tunneling: hostname = 'localhost' sys.stdout.flush() log_level = data['dist_log_level'] os.environ['OPENMDAO_KEEPDIRS'] = data['keep_dirs'] exc = None server = None try: # Update HostManager registry. dct = data['registry'] print '%s registry:' % ident for name in dct.keys(): module = dct[name] print' %s: %s' % (name, module) mod = __import__(module, fromlist=name) cls = getattr(mod, name) register(cls, HostManager) # Set some stuff. print '%s preparing to fork, log level %d' % (ident, log_level) sys.stdout.flush() util.get_logger().setLevel(log_level) forking.prepare(data) # Create Server for a HostManager object. name = '%d[%d]' % (data['index'], pid) logging.getLogger(name).setLevel(log_level) server = OpenMDAO_Server(HostManager._registry, (hostname, 0), authkey, 'pickle', name=name, allowed_users=allowed_users, allowed_hosts=[data['parent_address'][0]], allow_tunneling=allow_tunneling) print '%s server listening at %s' % (ident, server.address) except Exception as exc: print '%s caught exception: %s' % (ident, exc) # Report server address and public key back to parent. print '%s connecting to parent at %s' % (ident, data['parent_address']) sys.stdout.flush() for retry in range(10): try: conn = connection.Client(data['parent_address'], authkey=authkey) except socket.error as sock_exc: print '%s %s' % (ident, sock_exc) if retry < 9 and (sock_exc.args[0] == errno.ECONNREFUSED or \ sock_exc.args[0] == errno.ENOENT): print '%s retrying...' % ident time.sleep(1) else: print '%s exiting' % ident sys.exit(1) else: break if exc: conn.send((data['index'], None, str(exc))) else: conn.send((data['index'], server.address, server.public_key_text)) conn.close() if exc: print '%s exiting' % ident sys.exit(1) # Set name etc. current_process()._server = server current_process()._name = 'Host-%s:%s' % server.address current_process().authkey = authkey logging.getLogger(current_process()._name).setLevel(log_level) util._run_after_forkers() # Register a cleanup function. def cleanup(directory): """ Removes our directory unless OPENMDAO_KEEPDIRS set. """ keep_dirs = int(os.environ.get('OPENMDAO_KEEPDIRS', '0')) if not keep_dirs and os.path.exists(directory): print '%s removing directory %s' % (ident, directory) try: shutil.rmtree(directory, onerror=onerror) except WindowsError as exc: print '%s %s' % (ident, exc) print '%s shutting down host manager' % ident util.Finalize(None, cleanup, args=[data['dir']], exitpriority=0) # Start host manager. print '%s remote host manager starting in %s' % (ident, data['dir']) sys.stdout.flush() server.serve_forever()
elif path.startswith(WORKING_DIRECTORY): path = os.path.join(self.work_dir, path[len(WORKING_DIRECTORY) :]) return path @staticmethod def _jobname(name): """ Create legal job name from `name`. """ return name.translate(_XLATE) @staticmethod def _timelimit(seconds): """ Make time string from `seconds`. """ seconds = float(seconds) hours = int(seconds / (60 * 60)) seconds -= hours * 60 * 60 minutes = int(seconds / 60) seconds -= minutes * 60 seconds = int(seconds) return "%d:%d:%d" % (hours, minutes, seconds) class _ServerManager(OpenMDAO_Manager): """ A :class:`multiprocessing.Manager` which manages :class:`GridEngineServer`. """ pass register(GridEngineServer, _ServerManager, "grid_engine.grid_engine")
path = os.path.join(self.home_dir, path[len(HOME_DIRECTORY):]) elif path.startswith(WORKING_DIRECTORY): path = os.path.join(self.work_dir, path[len(WORKING_DIRECTORY):]) return path @staticmethod def _jobname(name): """ Create legal job name from `name`. """ return name.translate(_XLATE) @staticmethod def _timelimit(seconds): """ Make time string from `seconds`. """ seconds = float(seconds) hours = int(seconds / (60 * 60)) seconds -= hours * 60 * 60 minutes = int(seconds / 60) seconds -= minutes * 60 seconds = int(seconds) return '%d:%d:%d' % (hours, minutes, seconds) class _ServerManager(OpenMDAO_Manager): """ A :class:`multiprocessing.Manager` which manages :class:`GridEngineServer`. """ pass register(GridEngineServer, _ServerManager, 'grid_engine.grid_engine')
else: obj = server self._logger.log(LOG_DEBUG2, 'create returning %r at %r', obj, obj._token.address) return obj class _FactoryManager(OpenMDAO_Manager): """ A :class:`multiprocessing.Manager` which manages :class:`ObjServerFactory`. """ pass register(ObjServerFactory, _FactoryManager, 'openmdao.main.objserverfactory') class ObjServer(object): """ An object which knows how to create other objects, load a model, etc. All remote file accesses must be within the tree rooted in the current directory at startup. name: string Name of server; used in log messages, etc. allow_shell: bool If True, :meth:`execute_command` and :meth:`load_model` are allowed. Use with caution!
path = os.path.join(self.home_dir, path[len(HOME_DIRECTORY):]) elif path.startswith(WORKING_DIRECTORY): path = os.path.join(self.work_dir, path[len(WORKING_DIRECTORY):]) return path @staticmethod def _jobname(name): """ Create legal job name from `name`. """ return name.translate(_XLATE) @staticmethod def _timelimit(seconds): """ Make time string from `seconds`. """ seconds = float(seconds) hours = int(seconds / (60*60)) seconds -= hours * 60*60 minutes = int(seconds / 60) seconds -= minutes * 60 seconds = int(seconds) return '%d:%d:%d' % (hours, minutes, seconds) class _ServerManager(OpenMDAO_Manager): """ A :class:`multiprocessing.Manager` which manages :class:`GridEngineServer`. """ pass register(GridEngineServer, _ServerManager, 'grid_engine.grid_engine')
return path @staticmethod def _jobname(name): """ Create legal job name from `name`. """ name = name.strip()[:15] # 15 characters max. name = name.translate(_XLATE) if not name[0].isalpha(): name = 'Z%s' % name[1:] return name @staticmethod def _timelimit(seconds): """ Make time limit string from `seconds`. """ hours = int(seconds / (60*60)) seconds -= hours * 60*60 minutes = int(seconds / 60) seconds -= minutes * 60 seconds = int(seconds) return '%d:%02d:%02d' % (hours, minutes, seconds) class _ServerManager(OpenMDAO_Manager): """ A :class:`multiprocessing.Manager` which manages :class:`PBS_Server`. """ pass register(PBS_Server, _ServerManager, 'pbs.pbs')
def _fix_path(self, path): """ Translates special prefixes. """ if path.startswith(HOME_DIRECTORY): path = os.path.join(self.home_dir, path[len(HOME_DIRECTORY):]) elif path.startswith(WORKING_DIRECTORY): path = os.path.join(self.work_dir, path[len(WORKING_DIRECTORY):]) return path @staticmethod def _make_time(seconds): """ Make time string from `seconds`. """ seconds = float(seconds) hours = int(seconds / (60*60)) seconds -= hours * 60*60 minutes = int(seconds / 60) seconds -= minutes * 60 seconds = int(seconds) return '%d:%d:%d' % (hours, minutes, seconds) class _ServerManager(OpenMDAO_Manager): """ A :class:`multiprocessing.Manager` which manages :class:`GridEngineServer`. """ pass register(GridEngineServer, _ServerManager, 'openmdao.contrib.grid_engine.grid_engine')
# command=[] # command.extend(self.mpi_path) # # put together execute command from resource_desc # # first the execute command, probably mpirun # if 'hostnames' in resource_desc: # np = len(resource_desc['hostnames']) # if np > 0: # self.command.extend(('-np',str(np))) # command.extend(('-host',str(resource_desc['hostnames']))) # else: # raise ValueError('%s: np must be > 0, got %d' # % (self.name, np)) # else: # raise ValueError('"hostnames" key must be specified in resource_desc') # if 'remote_command' in resource_desc: # command.extend(resource_desc['remote_command']) # try: # process = ShellProc(command, DEV_NULL, 'qsub.out', STDOUT, env) class _ServerManager(OpenMDAO_Manager): """ A :class:`multiprocessing.Manager` which manages :class:`PBS_Server`. """ pass register(MPI_Server, _ServerManager, 'mpiallocator.mpiallocator')