def test_mpi_run_full(self): if not mpi_available(): self.skipTest("MPI is not available") cmd = ['cmd', 'arg1', 'arg2'] nics = ['eth0', 'eth1'] env = {'env1': 'val1', 'env2': 'val2'} stdout = '<stdout>' stderr = '<stderr>' tmout = timeout.Timeout(5, message='Timed out waiting for something.') settings = hvd_settings.Settings( verbose=0, ssh_port=1022, extra_mpi_args='>mpi-extra args go here<', binding_args='>binding args go here<', key=secret.make_secret_key(), start_timeout=tmout, num_hosts=1, num_proc=1, hosts='>host names go here<', output_filename='>output filename goes here<', run_func_mode=True) def mpi_impl_flags(tcp, env=None): return ["--mock-mpi-impl-flags"], [] with mock.patch("horovod.run.mpi_run._get_mpi_implementation_flags", side_effect=mpi_impl_flags) as impl: with mock.patch("horovod.run.mpi_run.safe_shell_exec.execute", return_value=0) as execute: mpi_run(settings, nics, env, cmd, stdout=stdout, stderr=stderr) # assert call on _get_mpi_implementation_flags impl.assert_called_once_with(None, env=env) # call the mocked _get_mpi_implementation_flags method ourselves mpi_flags, _ = horovod.run.mpi_run._get_mpi_implementation_flags( False) self.assertIsNotNone(mpi_flags) expected_command = ( 'mpirun ' '--allow-run-as-root --tag-output ' '-np 1 -H >host names go here< ' '>binding args go here< ' '{mpi_flags} ' '-mca plm_rsh_args "-p 1022" ' '-mca btl_tcp_if_include eth0,eth1 -x NCCL_SOCKET_IFNAME=eth0,eth1 ' '--output-filename >output filename goes here< ' '-x env1 -x env2 ' '>mpi-extra args go here< ' 'cmd arg1 arg2').format(mpi_flags=' '.join(mpi_flags)) expected_env = { 'env1': 'val1', 'env2': 'val2', 'PATH': os.environ.get('PATH') } execute.assert_called_once_with(expected_command, env=expected_env, stdout=stdout, stderr=stderr)
def test_mpi_run_minimal(self): if not mpi_available(): self.skipTest("MPI is not available") cmd = ['cmd'] settings = self.minimal_settings def mpi_impl_flags(tcp): return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"] with mock.patch("horovod.run.mpi_run._get_mpi_implementation_flags", side_effect=mpi_impl_flags): with mock.patch("horovod.run.mpi_run.safe_shell_exec.execute", return_value=0) as execute: mpi_run(settings, None, {}, cmd) # call the mocked _get_mpi_implementation_flags method mpi_flags, binding_args = horovod.run.mpi_run._get_mpi_implementation_flags( False) self.assertIsNotNone(mpi_flags) expected_cmd = ('mpirun ' '--allow-run-as-root --tag-output ' '-np 2 -H host ' '{binding_args} ' '{mpi_flags} ' 'cmd').format( binding_args=' '.join(binding_args), mpi_flags=' '.join(mpi_flags)) expected_env = {'PATH': os.environ.get('PATH')} execute.assert_called_once_with(expected_cmd, env=expected_env, stdout=None, stderr=None)
def test_run_failure(self, controller, mode, run): if controller == 'gloo' and not gloo_built(): self.skipTest("Gloo is not available") if controller == 'mpi': if not (mpi_built() and mpi_available()): self.skipTest("MPI is not available") if is_mpich(): self.skipTest("MPICH is not testable") self.do_test_run_with_controller_failure(controller, mode, run)
def test_mpi_run_with_non_zero_exit(self): if not mpi_available(): self.skipTest("MPI is not available") cmd = ['cmd'] settings = self.minimal_settings def mpi_impl_flags(tcp): return [], [] with mock.patch("horovod.run.mpi_run._get_mpi_implementation_flags", side_effect=mpi_impl_flags): with mock.patch("horovod.run.mpi_run.safe_shell_exec.execute", return_value=1): with pytest.raises(RuntimeError, match="^mpirun failed with exit code 1$"): mpi_run(settings, None, {}, cmd)
def test_mpi_run_on_large_cluster(self): if not mpi_available(): self.skipTest("MPI is not available") cmd = ['cmd'] settings = copy.copy(self.minimal_settings) settings.num_hosts = large_cluster_threshold def mpi_impl_flags(tcp, env=None): return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"] with mock.patch("horovod.run.mpi_run._get_mpi_implementation_flags", side_effect=mpi_impl_flags): with mock.patch("horovod.run.mpi_run.safe_shell_exec.execute", return_value=0) as execute: mpi_run(settings, None, {}, cmd) # call the mocked _get_mpi_implementation_flags method mpi_flags, binding_args = horovod.run.mpi_run._get_mpi_implementation_flags( False) self.assertIsNotNone(mpi_flags) mpi_flags.append('-mca plm_rsh_no_tree_spawn true') mpi_flags.append('-mca plm_rsh_num_concurrent {}'.format( settings.num_hosts)) expected_cmd = ('mpirun ' '--allow-run-as-root --tag-output ' '-np 2 -H localhost:2 ' '{binding_args} ' '{mpi_flags} ' 'cmd').format( binding_args=' '.join(binding_args), mpi_flags=' '.join(mpi_flags)) # remove PYTHONPATH from execute's env # we cannot know the exact value of that env variable # so we cannot test it through execute.assert_called_once_with self.assertIn('env', execute.call_args.kwargs) self.assertIn('PYTHONPATH', execute.call_args.kwargs['env']) actual_python_path = execute.call_args.kwargs['env'].pop( 'PYTHONPATH') self.assertIn(actual_python_path, os.pathsep.join(sys.path)) expected_env = {'PATH': os.environ.get('PATH')} execute.assert_called_once_with(expected_cmd, env=expected_env, stdout=None, stderr=None)
def do_test_mpi_run_env_override(self, sysenv, argenv, env_var, expected): if not mpi_available(): self.skipTest("MPI is not available") cmd = ['cmd'] settings = self.minimal_settings def mpi_impl_flags(tcp, env=None): return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"] with mock.patch("horovod.run.mpi_run._get_mpi_implementation_flags", side_effect=mpi_impl_flags),\ mock.patch("horovod.run.mpi_run.safe_shell_exec.execute", return_value=0) as execute,\ override_env(sysenv): mpi_run(settings, None, argenv, cmd) # assert the env variable in the execute's env self.assertIn('env', execute.call_args.kwargs) self.assertEqual(execute.call_args.kwargs['env'].get(env_var), expected)
def test_mpi_run_with_os_environ(self): if not mpi_available(): self.skipTest("MPI is not available") cmd = ['cmd'] settings = self.minimal_settings def mpi_impl_flags(tcp, env=None): return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"] with mock.patch("horovod.run.mpi_run._get_mpi_implementation_flags", side_effect=mpi_impl_flags): with mock.patch("horovod.run.mpi_run.safe_shell_exec.execute", return_value=0): with pytest.raises( Exception, match= "^env argument must be a dict, not <class 'os._Environ'>: " ): mpi_run(settings, None, os.environ, cmd)
def test_mpi_run_minimal(self): if not mpi_available(): self.skipTest("MPI is not available") cmd = ['cmd'] settings = self.minimal_settings def mpi_impl_flags(tcp, env=None): return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"] with mock.patch("horovod.run.mpi_run._get_mpi_implementation_flags", side_effect=mpi_impl_flags): with mock.patch("horovod.run.mpi_run.safe_shell_exec.execute", return_value=0) as execute: mpi_run(settings, None, {}, cmd) # call the mocked _get_mpi_implementation_flags method mpi_flags, binding_args = horovod.run.mpi_run._get_mpi_implementation_flags( False) self.assertIsNotNone(mpi_flags) expected_cmd = ('mpirun ' '--allow-run-as-root --tag-output ' '-np 2 -H localhost:2 ' '{binding_args} ' '{mpi_flags} ' 'cmd').format( binding_args=' '.join(binding_args), mpi_flags=' '.join(mpi_flags)) # remove PYTHONPATH from execute's env # we cannot know the exact value of that env variable # we test right handling of PYTHONPATH in test_mpi_run_*pythonpath* below self.assertIn('env', execute.call_args.kwargs) if 'PYTHONPATH' in execute.call_args.kwargs['env']: execute.call_args.kwargs['env'].pop('PYTHONPATH') expected_env = {'PATH': os.environ.get('PATH')} execute.assert_called_once_with(expected_cmd, env=expected_env, stdout=None, stderr=None)
def test_mpi_run_on_large_cluster(self): if not mpi_available(): self.skipTest("MPI is not available") cmd = ['cmd'] settings = copy.copy(self.minimal_settings) settings.num_hosts = large_cluster_threshold def mpi_impl_flags(tcp, env=None): return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"] with mock.patch("horovod.run.mpi_run._get_mpi_implementation_flags", side_effect=mpi_impl_flags): with mock.patch("horovod.run.mpi_run.safe_shell_exec.execute", return_value=0) as execute: mpi_run(settings, None, {}, cmd) # call the mocked _get_mpi_implementation_flags method mpi_flags, binding_args = horovod.run.mpi_run._get_mpi_implementation_flags( False) self.assertIsNotNone(mpi_flags) mpi_flags.append('-mca plm_rsh_no_tree_spawn true') mpi_flags.append('-mca plm_rsh_num_concurrent {}'.format( settings.num_hosts)) expected_cmd = ('mpirun ' '--allow-run-as-root --tag-output ' '-np 2 -H host ' '{binding_args} ' '{mpi_flags} ' 'cmd').format( binding_args=' '.join(binding_args), mpi_flags=' '.join(mpi_flags)) expected_env = {'PATH': os.environ.get('PATH')} execute.assert_called_once_with(expected_cmd, env=expected_env, stdout=None, stderr=None)