Beispiel #1
0
    def test_happy_run_elastic(self):
        args = HorovodArgs()

        # we need two different hosts here, otherwise would need to give args.nics
        args.hosts = 'localhost:2,127.0.0.1:2'
        args.command = [sys.executable, '-V']
        args.np = 2
        args.min_np = 2
        args.verbose = True

        # no assertions, we are happy when there are no exceptions
        # TODO: call into run() when elastic supports args.run_func (#1873)
        #       we can assert the returned result then
        _run(args)
Beispiel #2
0
 def test_run_with_jsrun(self, mocked_run_controller):
     hargs = HorovodArgs()
     _run(hargs)
     mocked_run_controller.assert_called_once()
Beispiel #3
0
    def horovod_args(self, mode, controller, run_func=None, command=None):
        if mode == 'local':
            local_hosts = ['localhost', '127.0.0.1']
            remote_hosts = []
        elif mode == 'remote':
            local_hosts = []
            remote_hosts = ['localhost', '127.0.0.1']
        else:
            local_hosts = ['localhost']
            remote_hosts = ['127.0.0.1']
        hosts = local_hosts.copy()
        hosts.extend(remote_hosts)

        if remote_hosts:
            ssh_works = _check_all_hosts_ssh_successful(remote_hosts, fn_cache=None)
            if not ssh_works:
                self.skipTest('password-less ssh to {} is required for this test'
                              .format(' and '.join(remote_hosts)))

        hargs = HorovodArgs()
        hargs.np = 4
        hargs.hosts = ','.join(['{}:2'.format(host) for host in hosts])
        hargs.use_gloo = controller == 'gloo'
        hargs.use_mpi = controller == 'mpi'
        hargs.run_func = run_func
        hargs.command = [command] if command else None
        hargs.nics = []
        hargs.verbose = 2
        hargs.disable_cache = True

        stdout = io.StringIO()
        try:
            with capture(stdout=stdout):
                with mock.patch('horovod.run.runner.network.filter_local_addresses',
                                side_effect=lambda hosts: [host for host in hosts if host not in local_hosts]), \
                     mock.patch('horovod.run.gloo_run.network.get_local_host_addresses',
                                return_value=local_hosts), \
                     mock.patch('horovod.run.gloo_run.network.resolve_host_address',
                                side_effect=lambda host: host), \
                     mock.patch('horovod.run.mpi_run.os.execve') as exec:
                    yield hargs, exec
        finally:
            stdout = stdout.readlines()
            print(''.join(stdout), file=sys.stdout)

        if mode == 'local':
            self.assertIn('Remote host found: \n', stdout)
            self.assertIn('All hosts are local, finding the interfaces with address 127.0.0.1\n', stdout)
            self.assertEqual(1, len([line for line in stdout if line.startswith('Local interface found ')]))
        elif mode == 'mixed':
            self.assertIn('Remote host found: 127.0.0.1\n', stdout)
        elif mode == 'remote':
            self.assertIn('Remote host found: localhost 127.0.0.1\n', stdout)
        else:
            raise RuntimeError('unknown mode {}'.format(mode))

        if mode in ['mixed', 'remote']:
            self.assertIn('Checking ssh on all remote hosts.\n', stdout)
            self.assertIn('SSH was successful into all the remote hosts.\n', stdout)
            self.assertIn('Testing interfaces on all the hosts.\n', stdout)
            self.assertIn('Launched horovod server.\n', stdout)
            self.assertEqual(2, len([line for line in stdout if line.startswith('Launching horovod task function: ')]))
            self.assertEqual(1, len([line for line in stdout if line.startswith('Launching horovod task function: ssh -o PasswordAuthentication=no -o StrictHostKeyChecking=no 127.0.0.1 ')]), stdout)
            if mode == 'remote':
                self.assertEqual(1, len([line for line in stdout if line.startswith('Launching horovod task function: ssh -o PasswordAuthentication=no -o StrictHostKeyChecking=no localhost ')]))
            else:
                self.assertEqual(1, len([line for line in stdout if line.startswith('Launching horovod task function: ') and not line.startswith('Launching horovod task function: ssh')]))
            self.assertIn('Attempted to launch horovod task servers.\n', stdout)
            self.assertIn('Waiting for the hosts to acknowledge.\n', stdout)
            self.assertIn('Notified all the hosts that the registration is complete.\n', stdout)
            self.assertIn('Waiting for hosts to perform host-to-host interface checking.\n', stdout)
            self.assertIn('Host-to-host interface checking successful.\n', stdout)
            self.assertIn('Interfaces on all the hosts were successfully checked.\n', stdout)
            self.assertEqual(1, len([line for line in stdout if line.startswith('Common interface found: ')]))