Example #1
0
def main():  #pragma no cover
    """
    Code which runs a host manager.
    Expects configuration data from parent on `stdin`.
    Replies with address and optionally public key.
    The environment variable ``OPENMDAO_KEEPDIRS`` can be used to avoid
    removal of the temporary directory used here.
    """
    sys.stdout = open('stdout', 'w')
    sys.stderr = open('stderr', 'w')

    #    util.log_to_stderr(logging.DEBUG)
    # Avoid root possibly masking us.
    logging.getLogger().setLevel(logging.DEBUG)

    import platform
    hostname = platform.node()
    pid = os.getpid()
    ident = '(%s:%d)' % (hostname, pid)
    print '%s main startup' % ident
    sys.stdout.flush()

    # Get data from parent over stdin.
    data = cPickle.load(sys.stdin)
    sys.stdin.close()
    print '%s data received' % ident

    authkey = data['authkey']
    allow_shell = data['allow_shell']
    allowed_users = data['allowed_users']
    print '%s using %s authentication' % (ident, keytype(authkey))
    if allowed_users is None:
        print '%s allowed_users: ANY' % ident
    else:
        print '%s allowed_users: %s' % (ident, sorted(allowed_users.keys()))
    if allow_shell:
        print '%s ALLOWING SHELL ACCESS' % ident
    sys.stdout.flush()
    log_level = data['dist_log_level']
    os.environ['OPENMDAO_KEEPDIRS'] = data['keep_dirs']

    exc = None
    server = None
    try:
        # Update HostManager registry.
        dct = data['registry']
        print '%s registry:' % ident
        for name in dct.keys():
            module = dct[name]
            print '    %s: %s' % (name, module)
            mod = __import__(module, fromlist=name)
            cls = getattr(mod, name)
            register(cls, HostManager)

        # Set some stuff.
        print '%s preparing to fork, log level %d' % (ident, log_level)
        sys.stdout.flush()
        util.get_logger().setLevel(log_level)
        forking.prepare(data)

        # Create Server for a HostManager object.
        name = '%d[%d]' % (data['index'], pid)
        logging.getLogger(name).setLevel(log_level)
        server = OpenMDAO_Server(HostManager._registry, (hostname, 0),
                                 authkey,
                                 'pickle',
                                 name=name,
                                 allowed_users=allowed_users,
                                 allowed_hosts=[data['parent_address'][0]])
    except Exception as exc:
        print '%s caught exception: %s' % (ident, exc)

    # Report server address and public key back to parent.
    print '%s connecting to parent at %s' % (ident, data['parent_address'])
    sys.stdout.flush()
    conn = connection.Client(data['parent_address'], authkey=authkey)
    if exc:
        conn.send((data['index'], None, str(exc)))
    else:
        conn.send((data['index'], server.address, server.public_key_text))
    conn.close()

    if exc:
        print '%s exiting' % ident
        sys.exit(1)

    # Set name etc.
    current_process()._server = server
    current_process()._name = 'Host-%s:%s' % server.address
    current_process().authkey = authkey
    logging.getLogger(current_process()._name).setLevel(log_level)
    util._run_after_forkers()

    # Register a cleanup function.
    def cleanup(directory):
        keep_dirs = int(os.environ.get('OPENMDAO_KEEPDIRS', '0'))
        if not keep_dirs and os.path.exists(directory):
            print '%s removing directory %s' % (ident, directory)
            shutil.rmtree(directory)
        print '%s shutting down host manager' % ident

    util.Finalize(None, cleanup, args=[data['dir']], exitpriority=0)

    # Start host manager.
    print '%s remote host manager starting in %s' % (ident, data['dir'])
    sys.stdout.flush()
    server.serve_forever()
Example #2
0
        return path

    @staticmethod
    def _jobname(name):
        """ Create legal job name from `name`. """
        name = name.strip()[:15]  # 15 characters max.
        name = name.translate(_XLATE)
        if name and not name[0].isalpha():
            name = 'Z%s' % name[1:]
        return name

    @staticmethod
    def _timelimit(seconds):
        """ Make time limit string from `seconds`. """
        hours = int(seconds / (60 * 60))
        seconds -= hours * 60 * 60
        minutes = int(seconds / 60)
        seconds -= minutes * 60
        seconds = int(seconds)
        return '%d:%02d:%02d' % (hours, minutes, seconds)


class _ServerManager(OpenMDAO_Manager):
    """
    A :class:`multiprocessing.Manager` which manages :class:`PBS_Server`.
    """
    pass


register(PBS_Server, _ServerManager, 'pbs.pbs')
            obj = server.create(typname, version, None, res_desc, **ctor_args)
        else:
            obj = server

        self._logger.log(LOG_DEBUG2, 'create returning %r at %r',
                         obj, obj._token.address)
        return obj


class _FactoryManager(OpenMDAO_Manager):
    """
    A :class:`multiprocessing.Manager` which manages :class:`ObjServerFactory`.
    """
    pass

register(ObjServerFactory, _FactoryManager, 'openmdao.main.objserverfactory')

    
class ObjServer(object):
    """
    An object which knows how to create other objects, load a model, etc.
    All remote file accesses must be within the tree rooted in the current
    directory at startup.

    name: string
        Name of server, used in log messages, etc.

    allow_shell: bool
        If True, :meth:`execute_command` and :meth:`load_model` are allowed.
        Use with caution!
            Description of required resources.

        criteria: dict
            The dictionary returned by :meth:`time_estimate`.
        """
        credentials = get_credentials()
        allowed_users = {credentials.user: credentials.public_key}
        try:
            return self.create(typname="", allowed_users=allowed_users, name=name)
        # Shouldn't happen...
        except Exception as exc:  # pragma no cover
            self._logger.error("create failed: %r", exc)
            return None


register(LocalAllocator, mp_distributing.Cluster)
register(LocalAllocator, mp_distributing.HostManager)


# Cluster allocation requires ssh configuration and multiple hosts.
class ClusterAllocator(object):  # pragma no cover
    """
    Cluster-based resource allocator.  This allocator manages a collection
    of :class:`LocalAllocator`, one for each machine in the cluster.

    name: string
        Name of allocator, used in log messages, etc.

    machines: list(dict)
        Dictionaries providing configuration data for each machine in the
        cluster.  At a minimum, each dictionary must specify a host
def main():  #pragma no cover
    """
    Code which runs a host manager.
    Expects configuration data from parent on `stdin`.
    Replies with address and optionally public key.
    The environment variable ``OPENMDAO_KEEPDIRS`` can be used to avoid
    removal of the temporary directory used here.
    """
    sys.stdout = open('stdout', 'w')
    sys.stderr = open('stderr', 'w')

#    util.log_to_stderr(logging.DEBUG)
    # Avoid root possibly masking us.
    logging.getLogger().setLevel(logging.DEBUG)

    pid = os.getpid()
    ident = '(%s:%d)' % (socket.gethostname(), pid)
    print '%s main startup' % ident
    sys.stdout.flush()

    # Get data from parent over stdin.
    dump = sys.stdin.read()
    sys.stdin.close()
    print '%s data received (%s)' % (ident, len(dump))
    data = cPickle.loads(base64.b64decode(dump))

    hostname = data['hostname']
    print '%s using hostname %s' % (ident, hostname)

    authkey = data['authkey']
    print '%s using %s authentication' % (ident, keytype(authkey))

    allowed_users = data['allowed_users']
    if allowed_users is None:
        print '%s allowed_users: ANY' % ident
    else:
        print '%s allowed_users: %s' % (ident, sorted(allowed_users.keys()))

    allow_shell = data['allow_shell']
    if allow_shell:
        print '%s ALLOWING SHELL ACCESS' % ident

    allow_tunneling = data['allow_tunneling']
    print '%s allow_tunneling: %s' % (ident, allow_tunneling)
    if allow_tunneling:
        hostname = 'localhost'

    sys.stdout.flush()

    log_level = data['dist_log_level']
    os.environ['OPENMDAO_KEEPDIRS'] = data['keep_dirs']

    exc = None
    server = None
    try:
        # Update HostManager registry.
        dct = data['registry']
        print '%s registry:' % ident
        for name in dct.keys():
            module = dct[name]
            print'    %s: %s' % (name, module)
            mod = __import__(module, fromlist=name)
            cls = getattr(mod, name)
            register(cls, HostManager)

        # Set some stuff.
        print '%s preparing to fork, log level %d' % (ident, log_level)
        sys.stdout.flush()
        util.get_logger().setLevel(log_level)
        forking.prepare(data)

        # Create Server for a HostManager object.
        name = '%d[%d]' % (data['index'], pid)
        logging.getLogger(name).setLevel(log_level)
        server = OpenMDAO_Server(HostManager._registry, (hostname, 0),
                                 authkey, 'pickle', name=name,
                                 allowed_users=allowed_users,
                                 allowed_hosts=[data['parent_address'][0]],
                                 allow_tunneling=allow_tunneling)
        print '%s server listening at %s' % (ident, server.address)
    except Exception as exc:
        print '%s caught exception: %s' % (ident, exc)

    # Report server address and public key back to parent.
    print '%s connecting to parent at %s' % (ident, data['parent_address'])
    sys.stdout.flush()
    for retry in range(10):
        try:
            conn = connection.Client(data['parent_address'], authkey=authkey)
        except socket.error as sock_exc:
            print '%s %s' % (ident, sock_exc)
            if retry < 9 and (sock_exc.args[0] == errno.ECONNREFUSED or \
                              sock_exc.args[0] == errno.ENOENT):
                print '%s retrying...' % ident
                time.sleep(1)
            else:
                print '%s exiting' % ident
                sys.exit(1)
        else:
            break
    if exc:
        conn.send((data['index'], None, str(exc)))
    else:
        conn.send((data['index'], server.address, server.public_key_text))
    conn.close()

    if exc:
        print '%s exiting' % ident
        sys.exit(1)

    # Set name etc.
    current_process()._server = server
    current_process()._name = 'Host-%s:%s' % server.address
    current_process().authkey = authkey
    logging.getLogger(current_process()._name).setLevel(log_level)
    util._run_after_forkers()

    # Register a cleanup function.
    def cleanup(directory):
        """ Removes our directory unless OPENMDAO_KEEPDIRS set. """
        keep_dirs = int(os.environ.get('OPENMDAO_KEEPDIRS', '0'))
        if not keep_dirs and os.path.exists(directory):
            print '%s removing directory %s' % (ident, directory)
            try:
                shutil.rmtree(directory, onerror=onerror)
            except WindowsError as exc:
                print '%s %s' % (ident, exc)
        print '%s shutting down host manager' % ident
    util.Finalize(None, cleanup, args=[data['dir']], exitpriority=0)

    # Start host manager.
    print '%s remote host manager starting in %s' % (ident, data['dir'])
    sys.stdout.flush()
    server.serve_forever()
        elif path.startswith(WORKING_DIRECTORY):
            path = os.path.join(self.work_dir, path[len(WORKING_DIRECTORY) :])
        return path

    @staticmethod
    def _jobname(name):
        """ Create legal job name from `name`. """
        return name.translate(_XLATE)

    @staticmethod
    def _timelimit(seconds):
        """ Make time string from `seconds`. """
        seconds = float(seconds)
        hours = int(seconds / (60 * 60))
        seconds -= hours * 60 * 60
        minutes = int(seconds / 60)
        seconds -= minutes * 60
        seconds = int(seconds)
        return "%d:%d:%d" % (hours, minutes, seconds)


class _ServerManager(OpenMDAO_Manager):
    """
    A :class:`multiprocessing.Manager` which manages :class:`GridEngineServer`.
    """

    pass


register(GridEngineServer, _ServerManager, "grid_engine.grid_engine")
            path = os.path.join(self.home_dir, path[len(HOME_DIRECTORY):])
        elif path.startswith(WORKING_DIRECTORY):
            path = os.path.join(self.work_dir, path[len(WORKING_DIRECTORY):])
        return path

    @staticmethod
    def _jobname(name):
        """ Create legal job name from `name`. """
        return name.translate(_XLATE)

    @staticmethod
    def _timelimit(seconds):
        """ Make time string from `seconds`. """
        seconds = float(seconds)
        hours = int(seconds / (60 * 60))
        seconds -= hours * 60 * 60
        minutes = int(seconds / 60)
        seconds -= minutes * 60
        seconds = int(seconds)
        return '%d:%d:%d' % (hours, minutes, seconds)


class _ServerManager(OpenMDAO_Manager):
    """
    A :class:`multiprocessing.Manager` which manages :class:`GridEngineServer`.
    """
    pass


register(GridEngineServer, _ServerManager, 'grid_engine.grid_engine')
        else:
            obj = server

        self._logger.log(LOG_DEBUG2, 'create returning %r at %r', obj,
                         obj._token.address)
        return obj


class _FactoryManager(OpenMDAO_Manager):
    """
    A :class:`multiprocessing.Manager` which manages :class:`ObjServerFactory`.
    """
    pass


register(ObjServerFactory, _FactoryManager, 'openmdao.main.objserverfactory')


class ObjServer(object):
    """
    An object which knows how to create other objects, load a model, etc.
    All remote file accesses must be within the tree rooted in the current
    directory at startup.

    name: string
        Name of server; used in log messages, etc.

    allow_shell: bool
        If True, :meth:`execute_command` and :meth:`load_model` are allowed.
        Use with caution!
            path = os.path.join(self.home_dir, path[len(HOME_DIRECTORY):])
        elif path.startswith(WORKING_DIRECTORY):
            path = os.path.join(self.work_dir, path[len(WORKING_DIRECTORY):])
        return path

    @staticmethod
    def _jobname(name):
        """ Create legal job name from `name`. """
        return name.translate(_XLATE)

    @staticmethod
    def _timelimit(seconds):
        """ Make time string from `seconds`. """
        seconds = float(seconds)
        hours = int(seconds / (60*60))
        seconds -= hours * 60*60
        minutes = int(seconds / 60)
        seconds -= minutes * 60
        seconds = int(seconds)
        return '%d:%d:%d' % (hours, minutes, seconds)


class _ServerManager(OpenMDAO_Manager):
    """
    A :class:`multiprocessing.Manager` which manages :class:`GridEngineServer`.
    """
    pass

register(GridEngineServer, _ServerManager, 'grid_engine.grid_engine')

Example #10
0
        return path

    @staticmethod
    def _jobname(name):
        """ Create legal job name from `name`. """
        name = name.strip()[:15]  # 15 characters max.
        name = name.translate(_XLATE)
        if not name[0].isalpha():
            name = 'Z%s' % name[1:]
        return name

    @staticmethod
    def _timelimit(seconds):
        """ Make time limit string from `seconds`. """
        hours = int(seconds / (60*60))
        seconds -= hours * 60*60
        minutes = int(seconds / 60)
        seconds -= minutes * 60
        seconds = int(seconds)
        return '%d:%02d:%02d' % (hours, minutes, seconds)


class _ServerManager(OpenMDAO_Manager):
    """
    A :class:`multiprocessing.Manager` which manages :class:`PBS_Server`.
    """
    pass

register(PBS_Server, _ServerManager, 'pbs.pbs')

Example #11
0
    def _fix_path(self, path):
        """ Translates special prefixes. """
        if path.startswith(HOME_DIRECTORY):
            path = os.path.join(self.home_dir, path[len(HOME_DIRECTORY):])
        elif path.startswith(WORKING_DIRECTORY):
            path = os.path.join(self.work_dir, path[len(WORKING_DIRECTORY):])
        return path

    @staticmethod
    def _make_time(seconds):
        """ Make time string from `seconds`. """
        seconds = float(seconds)
        hours = int(seconds / (60*60))
        seconds -= hours * 60*60
        minutes = int(seconds / 60)
        seconds -= minutes * 60
        seconds = int(seconds)
        return '%d:%d:%d' % (hours, minutes, seconds)


class _ServerManager(OpenMDAO_Manager):
    """
    A :class:`multiprocessing.Manager` which manages :class:`GridEngineServer`.
    """
    pass

register(GridEngineServer, _ServerManager,
         'openmdao.contrib.grid_engine.grid_engine')

Example #12
0
#       command=[]
#       command.extend(self.mpi_path)

#       # put together execute command from resource_desc
#       # first the execute command, probably mpirun
#       if 'hostnames' in resource_desc:
#           np = len(resource_desc['hostnames'])
#           if np > 0: 
#               self.command.extend(('-np',str(np)))
#               command.extend(('-host',str(resource_desc['hostnames'])))
#           else:
#               raise ValueError('%s: np must be > 0, got %d'
#                                % (self.name, np))
#       else:
#           raise ValueError('"hostnames" key must be specified in resource_desc')

#       if 'remote_command' in resource_desc:
#           command.extend(resource_desc['remote_command'])

#       try:
#           process = ShellProc(command, DEV_NULL, 'qsub.out', STDOUT, env)


class _ServerManager(OpenMDAO_Manager):
    """  
    A :class:`multiprocessing.Manager` which manages :class:`PBS_Server`.
    """
    pass 

register(MPI_Server, _ServerManager, 'mpiallocator.mpiallocator')