def test_hostnames(self):
        logging.debug('')
        logging.debug('test_hostnames')

        hostnames = ResourceAllocationManager.get_hostnames({'n_cpus':1})
        self.assertEqual(hostnames[0], platform.node())
        
        hostnames = ResourceAllocationManager.get_hostnames({'no_such_resource':1})
        self.assertEqual(hostnames, None)
Beispiel #2
0
    def test_hostnames(self):
        logging.debug('')
        logging.debug('test_hostnames')

        hostnames = ResourceAllocationManager.get_hostnames({'n_cpus': 1})
        self.assertEqual(hostnames[0], platform.node())

        hostnames = ResourceAllocationManager.get_hostnames(
            {'no_such_resource': 1})
        self.assertEqual(hostnames, None)
    def test_hostnames(self):
        logging.debug('')
        logging.debug('test_hostnames')

        hostnames = RAM.get_hostnames({'min_cpus': 1})
        self.assertEqual(hostnames[0], platform.node())

        hostnames = RAM.get_hostnames({'allocator': 'LocalHost',
                                       'localhost': False})
        self.assertEqual(hostnames, None)
    def test_hostnames(self):
        logging.debug('')
        logging.debug('test_hostnames')

        hostnames = RAM.get_hostnames({'min_cpus':1})
        self.assertEqual(hostnames[0], platform.node())
        
        hostnames = RAM.get_hostnames({'allocator':'LocalHost',
                                       'localhost':False})
        self.assertEqual(hostnames, None)
Beispiel #5
0
def main():
    """ Configure a cluster and use it. """
    enable_console(logging.DEBUG)
    logging.getLogger().setLevel(0)
    print 'Client PID', os.getpid()

    # Configure cluster.
    cluster_name = 'EC2Cluster'
    machines = []
    if USE_EC2:
        # The identity file used to access EC2 via ssh.
        identity_filename = os.path.expanduser('~/.ssh/lovejoykey')
        identity_filename += '.ppk' if sys.platform == 'win32' else '.pem'

        machines.append(ClusterHost(
            hostname='*****@*****.**',
            python='setowns1_2013-05-06_09.17.04.529682' \
                   '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/bin/python',
            tunnel_incoming=True, tunnel_outgoing=True,
            identity_filename=identity_filename))

        machines.append(ClusterHost(
            hostname='*****@*****.**',
            python='setowns1_2013-05-06_09.17.03.113077' \
                   '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/bin/python',
            tunnel_incoming=True, tunnel_outgoing=True,
            identity_filename=identity_filename))

        machines.append(ClusterHost(
            hostname='*****@*****.**',
            python='setowns1_2013-05-06_09.17.05.434412' \
                   '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/bin/python',
            tunnel_incoming=True, tunnel_outgoing=True,
            identity_filename=identity_filename))

        machines.append(ClusterHost(
            hostname='*****@*****.**',
            python='setowns1_2013-05-06_09.20.17.379627' \
                   '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/Scripts/python',
            tunnel_incoming=True, tunnel_outgoing=True,
            identity_filename=identity_filename))

        machines.append(ClusterHost(
            hostname='*****@*****.**',
            python='setowns1_2013-05-06_09.19.49.348885' \
                   '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/Scripts/python',
            tunnel_incoming=True, tunnel_outgoing=True,
            identity_filename=identity_filename))

    #        machines.append(ClusterHost(
    #            hostname='viper.grc.nasa.gov',
    #            python='OpenMDAO-Framework/devenv/bin/python',
    #            tunnel_incoming=True, tunnel_outgoing=True,
    #            identity_filename=None))
    else:
        # Trivial local 'cluster' for debugging without remote host issues.
        machines.append(
            ClusterHost(hostname=socket.getfqdn(), python=sys.executable))
    #        machines.append(ClusterHost(
    #            hostname='viper.grc.nasa.gov',
    #            python='OpenMDAO-Framework/devenv/bin/python',
    #            tunnel_incoming=True, tunnel_outgoing=True,
    #            identity_filename=None))

    # Start it.
    cluster = ClusterAllocator(cluster_name,
                               machines,
                               allow_shell=True,
                               method='load-average')
    #                               method='greedy')
    #                               method='round-robin')
    print 'Cluster initialized'
    RAM.insert_allocator(0, cluster)

    n_servers = RAM.max_servers(dict(allocator=cluster_name))
    print n_servers, 'Servers:'
    for name in RAM.get_hostnames(
            dict(allocator=cluster_name, min_cpus=n_servers)):
        print '   ', name

    # Create model.
    top = GPOptimization()

    # Configure DOE.
    top.driver.sequential = False  # Run concurrently across cluster.
    top.driver.reload_model = False
    # Force use of only cluster hosts by adding this requirement.
    top.driver.extra_resources = dict(allocator=cluster_name)
    # This is necessary more often than it should be.
    top.driver.ignore_egg_requirements = True

    # Perform the optimization.
    top.run()
def main():
    """ Configure a cluster and use it. """
    enable_console(logging.DEBUG)
    logging.getLogger().setLevel(0)
    print 'Client PID', os.getpid()

    # Configure cluster.
    cluster_name = 'EC2Cluster'
    machines = []
    if USE_EC2:
        # The identity file used to access EC2 via ssh.
        identity_filename = os.path.expanduser('~/.ssh/lovejoykey')
        identity_filename += '.ppk' if sys.platform == 'win32' else '.pem'

        machines.append(ClusterHost(
            hostname='*****@*****.**',
            python='setowns1_2013-05-06_09.17.04.529682' \
                '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/bin/python',
            tunnel_incoming=True, tunnel_outgoing=True,
            identity_filename=identity_filename))

        machines.append(ClusterHost(
            hostname='*****@*****.**',
            python='setowns1_2013-05-06_09.17.03.113077' \
                '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/bin/python',
            tunnel_incoming=True, tunnel_outgoing=True,
            identity_filename=identity_filename))

        machines.append(ClusterHost(
            hostname='*****@*****.**',
            python='setowns1_2013-05-06_09.17.05.434412' \
                '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/bin/python',
            tunnel_incoming=True, tunnel_outgoing=True,
            identity_filename=identity_filename))

        machines.append(ClusterHost(
            hostname='*****@*****.**',
            python='setowns1_2013-05-06_09.20.17.379627' \
                '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/Scripts/python',
            tunnel_incoming=True, tunnel_outgoing=True,
            identity_filename=identity_filename))

        machines.append(ClusterHost(
            hostname='*****@*****.**',
            python='setowns1_2013-05-06_09.19.49.348885' \
                '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/Scripts/python',
            tunnel_incoming=True, tunnel_outgoing=True,
            identity_filename=identity_filename))

#        machines.append(ClusterHost(
#            hostname='viper.grc.nasa.gov',
#            python='OpenMDAO-Framework/devenv/bin/python',
#            tunnel_incoming=True, tunnel_outgoing=True,
#            identity_filename=None))
    else:
        # Trivial local 'cluster' for debugging without remote host issues.
        machines.append(ClusterHost(hostname=socket.getfqdn(),
                                    python=sys.executable))
#        machines.append(ClusterHost(
#            hostname='viper.grc.nasa.gov',
#            python='OpenMDAO-Framework/devenv/bin/python',
#            tunnel_incoming=True, tunnel_outgoing=True,
#            identity_filename=None))

    # Start it.
    cluster = ClusterAllocator(cluster_name, machines, allow_shell=True,
                               method='load-average')
#                               method='greedy')
#                               method='round-robin')
    print 'Cluster initialized'
    RAM.insert_allocator(0, cluster)

    n_servers = RAM.max_servers(dict(allocator=cluster_name))
    print n_servers, 'Servers:'
    for name in RAM.get_hostnames(dict(allocator=cluster_name,
                                       min_cpus=n_servers)):
        print '   ', name

    # Create model.
    top = GPOptimization()

    # Configure DOE.
    top.driver.sequential = False   # Run concurrently across cluster.
    top.driver.reload_model = False
    # Force use of only cluster hosts by adding this requirement.
    top.driver.extra_resources = dict(allocator=cluster_name)
    # This is necessary more often than it should be.
    top.driver.ignore_egg_requirements = True

    # Perform the optimization.
    top.run()
Beispiel #7
0
    def _run_parallel(self, busy_hosts):
        """
        Run parallel version of ADPAC. Gets hostnames from resource
        allocators and uses MPI for distribution. `busy_hosts` is a list of
        hosts to exclude from consideration, and is updated with the hosts
        we attempt to use here.  This provides a mechanism to skip those hosts
        a previous attempt failed with.
        """
        if self.mpi_procs:
            n_cpus = self.mpi_procs
        else:
            n_cpus = len(self.input.nbld)
#TODO: get correct number of blocks (nbld isn't necessarily correct)

        hostnames = RAM.get_hostnames(dict(n_cpus=n_cpus, exclude=busy_hosts))
        if not hostnames:
            self.raise_exception('No hosts!', RuntimeError)
        busy_hosts.extend(hostnames)

        machinefile = 'machines'
        with open(machinefile, 'w') as out:
            for name in hostnames:
                out.write('%s\n' % name)

        self.command = [self.mpi_path]
        self.command.extend(['-np', str(n_cpus)])
        self.command.extend(['-machinefile', machinefile])

        if os.path.sep in self.mpi_adpac:
            self.command.append(self.mpi_adpac)
        else:
            # Some mpirun commands want a real path.
            for prefix in os.environ['PATH'].split(os.path.pathsep):
                path = os.path.join(prefix, self.mpi_adpac)
                if os.path.exists(path):
                    self.command.append(path)
                    break
            else:
                self.raise_exception("Can't find %r on PATH" % self.mpi_adpac,
                                     RuntimeError)
        if self.stats:
            self.command.extend(['-s', 'all'])
        self.command.append('-Z')

        if self.iasync:
            self.command.append('-a')
        if self.ibalance:
            self.command.append('-b')
        if not self.icheck:
            self.command.append('-c')
        if not self.idissf:
            self.command.append('-d')
        if self.irevs:
            self.command.append('-r')

        self.command.extend(['-i', self.input.casename+'.input'])
        self.command.extend(['-o', self.input.casename+'.output'])

        self.stdout = self.input.casename+'.log'
        self.stderr = ExternalCode.STDOUT
        self.resources = {}  # MPI will do distribution.
        super(ADPAC, self).execute()

        # On some systems (like GX with a shared filesystem between
        # front-end and compute nodes) it can take a bit before the
        # output files 'materialize'.
        for retry in range(30):
            if os.path.exists(self.input.casename+'.log') and \
               os.path.exists(self.input.casename+'.output'):
                break
            else:
                time.sleep(1)
        else:
            self.raise_exception('timeout waiting for output files',
                                 RuntimeError)