def do_test_run_with_controller_failure(self, controller, mode, run): if run == 'func': command = None run_func = lambda: fn(0) elif run == 'cmd': command = 'false' run_func = None else: self.fail('unknown run argument {}'.format(run)) if controller == 'mpi': exception = 'mpirun failed with exit code 1' else: exception = 'Horovod detected that one or more processes exited with non-zero status' with self.horovod_args(mode, controller=controller, run_func=run_func, command=command) as (hargs, exec): if controller == 'mpi' and run == 'cmd': self.assertIsNone(_run(hargs)) exec.assert_called_once() args, kwargs = exec.call_args executable, args, env = args self.assertEqual('/bin/sh', executable) self.assertEqual(3, len(args)) self.assertEqual('/bin/sh', args[0]) self.assertEqual('-c', args[1]) exit_code = safe_shell_exec.execute(args[2], env) self.assertEqual(1, exit_code) else: with pytest.raises(RuntimeError, match=exception): _run(hargs)
def do_test_run_with_controller_success(self, controller, mode, run): if run == 'func': command = None run_func = fn elif run == 'cmd': command = 'true' run_func = None else: self.fail('unknown run argument {}'.format(run)) with self.horovod_args(mode, controller, run_func=run_func, command=command) as (hargs, exec): if controller == 'mpi' and run == 'cmd': self.assertIsNone(_run(hargs)) exec.assert_called_once() args, kwargs = exec.call_args executable, args, env = args self.assertEqual('/bin/sh', executable) self.assertEqual(3, len(args)) self.assertEqual('/bin/sh', args[0]) self.assertEqual('-c', args[1]) exit_code = safe_shell_exec.execute(args[2], env) self.assertEqual(0, exit_code) else: actual = _run(hargs) expected = list([(rank, hargs.np) for rank in range(hargs.np)]) if run == 'func' else None self.assertEqual(expected, actual)
def test_happy_run_elastic(self): args = HorovodArgs() # we need two different hosts here, otherwise would need to give args.nics args.hosts = 'localhost:2,127.0.0.1:2' args.command = [sys.executable, '-V'] args.np = 2 args.min_np = 2 args.verbose = True # no assertions, we are happy when there are no exceptions # TODO: call into run() when elastic supports args.run_func (#1873) # we can assert the returned result then _run(args)
def test_run_with_jsrun(self, mocked_run_controller): hargs = HorovodArgs() _run(hargs) mocked_run_controller.assert_called_once()