Example #1
0
    def do_test_run_with_controller_success(self, controller, mode, run):
        if run == 'func':
            command = None
            run_func = fn
        elif run == 'cmd':
            command = 'true'
            run_func = None
        else:
            self.fail('unknown run argument {}'.format(run))

        with self.horovod_args(mode, controller, run_func=run_func, command=command) as (hargs, exec):
            if controller == 'mpi' and run == 'cmd':
                self.assertIsNone(_run(hargs))
                exec.assert_called_once()
                args, kwargs = exec.call_args
                executable, args, env = args
                self.assertEqual('/bin/sh', executable)
                self.assertEqual(3, len(args))
                self.assertEqual('/bin/sh', args[0])
                self.assertEqual('-c', args[1])
                exit_code = safe_shell_exec.execute(args[2], env)
                self.assertEqual(0, exit_code)
            else:
                actual = _run(hargs)
                expected = list([(rank, hargs.np) for rank in range(hargs.np)]) if run == 'func' else None
                self.assertEqual(expected, actual)
Example #2
0
    def do_test_run_with_controller_failure(self, controller, mode, run):
        if run == 'func':
            command = None
            run_func = lambda: fn(0)
        elif run == 'cmd':
            command = 'false'
            run_func = None
        else:
            self.fail('unknown run argument {}'.format(run))

        if controller == 'mpi':
            exception = 'mpirun failed with exit code 1'
        else:
            exception = 'Horovod detected that one or more processes exited with non-zero status'

        with self.horovod_args(mode, controller=controller, run_func=run_func, command=command) as (hargs, exec):
            if controller == 'mpi' and run == 'cmd':
                self.assertIsNone(_run(hargs))
                exec.assert_called_once()
                args, kwargs = exec.call_args
                executable, args, env = args
                self.assertEqual('/bin/sh', executable)
                self.assertEqual(3, len(args))
                self.assertEqual('/bin/sh', args[0])
                self.assertEqual('-c', args[1])
                exit_code = safe_shell_exec.execute(args[2], env)
                self.assertEqual(1, exit_code)
            else:
                with pytest.raises(RuntimeError, match=exception):
                    _run(hargs)
Example #3
0
    def test_happy_run_elastic(self):
        args = _HorovodArgs()

        # we need two different hosts here, otherwise would need to give args.nics
        args.hosts = 'localhost:2,127.0.0.1:2'
        args.command = [sys.executable, '-V']
        args.np = 2
        args.min_np = 2
        args.verbose = True

        # no assertions, we are happy when there are no exceptions
        # TODO: call into run() when elastic supports args.run_func (#1873)
        #       we can assert the returned result then
        _run(args)
Example #4
0
 def test_run_with_jsrun(self, mocked_run_controller):
     hargs = _HorovodArgs()
     _run(hargs)
     mocked_run_controller.assert_called_once()