예제 #1
0
    def test_mpi_run_full(self):
        if not mpi_available():
            self.skipTest("MPI is not available")

        cmd = ['cmd', 'arg1', 'arg2']
        nics = ['eth0', 'eth1']
        env = {'env1': 'val1', 'env2': 'val2'}
        stdout = '<stdout>'
        stderr = '<stderr>'
        tmout = timeout.Timeout(5, message='Timed out waiting for something.')
        settings = hvd_settings.Settings(
            verbose=0,
            ssh_port=1022,
            extra_mpi_args='>mpi-extra args go here<',
            binding_args='>binding args go here<',
            key=secret.make_secret_key(),
            start_timeout=tmout,
            num_hosts=1,
            num_proc=1,
            hosts='>host names go here<',
            output_filename='>output filename goes here<',
            run_func_mode=True)

        def mpi_impl_flags(tcp, env=None):
            return ["--mock-mpi-impl-flags"], []

        with mock.patch("horovod.run.mpi_run._get_mpi_implementation_flags",
                        side_effect=mpi_impl_flags) as impl:
            with mock.patch("horovod.run.mpi_run.safe_shell_exec.execute",
                            return_value=0) as execute:
                mpi_run(settings, nics, env, cmd, stdout=stdout, stderr=stderr)

                # assert call on _get_mpi_implementation_flags
                impl.assert_called_once_with(None, env=env)

                # call the mocked _get_mpi_implementation_flags method ourselves
                mpi_flags, _ = horovod.run.mpi_run._get_mpi_implementation_flags(
                    False)
                self.assertIsNotNone(mpi_flags)
                expected_command = (
                    'mpirun '
                    '--allow-run-as-root --tag-output '
                    '-np 1 -H >host names go here< '
                    '>binding args go here< '
                    '{mpi_flags} '
                    '-mca plm_rsh_args "-p 1022" '
                    '-mca btl_tcp_if_include eth0,eth1 -x NCCL_SOCKET_IFNAME=eth0,eth1 '
                    '--output-filename >output filename goes here< '
                    '-x env1 -x env2 '
                    '>mpi-extra args go here< '
                    'cmd arg1 arg2').format(mpi_flags=' '.join(mpi_flags))
                expected_env = {
                    'env1': 'val1',
                    'env2': 'val2',
                    'PATH': os.environ.get('PATH')
                }
                execute.assert_called_once_with(expected_command,
                                                env=expected_env,
                                                stdout=stdout,
                                                stderr=stderr)
예제 #2
0
    def test_mpi_run_minimal(self):
        if not mpi_available():
            self.skipTest("MPI is not available")

        cmd = ['cmd']
        settings = self.minimal_settings

        def mpi_impl_flags(tcp):
            return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"]

        with mock.patch("horovod.run.mpi_run._get_mpi_implementation_flags",
                        side_effect=mpi_impl_flags):
            with mock.patch("horovod.run.mpi_run.safe_shell_exec.execute",
                            return_value=0) as execute:
                mpi_run(settings, None, {}, cmd)

                # call the mocked _get_mpi_implementation_flags method
                mpi_flags, binding_args = horovod.run.mpi_run._get_mpi_implementation_flags(
                    False)
                self.assertIsNotNone(mpi_flags)
                expected_cmd = ('mpirun '
                                '--allow-run-as-root --tag-output '
                                '-np 2 -H host '
                                '{binding_args} '
                                '{mpi_flags}       '
                                'cmd').format(
                                    binding_args=' '.join(binding_args),
                                    mpi_flags=' '.join(mpi_flags))
                expected_env = {'PATH': os.environ.get('PATH')}
                execute.assert_called_once_with(expected_cmd,
                                                env=expected_env,
                                                stdout=None,
                                                stderr=None)
예제 #3
0
    def test_run_failure(self, controller, mode, run):
        if controller == 'gloo' and not gloo_built():
            self.skipTest("Gloo is not available")
        if controller == 'mpi':
            if not (mpi_built() and mpi_available()):
                self.skipTest("MPI is not available")
            if is_mpich():
                self.skipTest("MPICH is not testable")

        self.do_test_run_with_controller_failure(controller, mode, run)
예제 #4
0
    def test_mpi_run_with_non_zero_exit(self):
        if not mpi_available():
            self.skipTest("MPI is not available")

        cmd = ['cmd']
        settings = self.minimal_settings

        def mpi_impl_flags(tcp):
            return [], []

        with mock.patch("horovod.run.mpi_run._get_mpi_implementation_flags", side_effect=mpi_impl_flags):
            with mock.patch("horovod.run.mpi_run.safe_shell_exec.execute", return_value=1):
                with pytest.raises(RuntimeError, match="^mpirun failed with exit code 1$"):
                    mpi_run(settings, None, {}, cmd)
예제 #5
0
    def test_mpi_run_on_large_cluster(self):
        if not mpi_available():
            self.skipTest("MPI is not available")

        cmd = ['cmd']
        settings = copy.copy(self.minimal_settings)
        settings.num_hosts = large_cluster_threshold

        def mpi_impl_flags(tcp, env=None):
            return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"]

        with mock.patch("horovod.run.mpi_run._get_mpi_implementation_flags",
                        side_effect=mpi_impl_flags):
            with mock.patch("horovod.run.mpi_run.safe_shell_exec.execute",
                            return_value=0) as execute:
                mpi_run(settings, None, {}, cmd)

                # call the mocked _get_mpi_implementation_flags method
                mpi_flags, binding_args = horovod.run.mpi_run._get_mpi_implementation_flags(
                    False)
                self.assertIsNotNone(mpi_flags)
                mpi_flags.append('-mca plm_rsh_no_tree_spawn true')
                mpi_flags.append('-mca plm_rsh_num_concurrent {}'.format(
                    settings.num_hosts))
                expected_cmd = ('mpirun '
                                '--allow-run-as-root --tag-output '
                                '-np 2 -H localhost:2 '
                                '{binding_args} '
                                '{mpi_flags}       '
                                'cmd').format(
                                    binding_args=' '.join(binding_args),
                                    mpi_flags=' '.join(mpi_flags))

                # remove PYTHONPATH from execute's env
                # we cannot know the exact value of that env variable
                # so we cannot test it through execute.assert_called_once_with
                self.assertIn('env', execute.call_args.kwargs)
                self.assertIn('PYTHONPATH', execute.call_args.kwargs['env'])
                actual_python_path = execute.call_args.kwargs['env'].pop(
                    'PYTHONPATH')
                self.assertIn(actual_python_path, os.pathsep.join(sys.path))

                expected_env = {'PATH': os.environ.get('PATH')}
                execute.assert_called_once_with(expected_cmd,
                                                env=expected_env,
                                                stdout=None,
                                                stderr=None)
예제 #6
0
파일: test_run.py 프로젝트: zpcalan/horovod
    def do_test_mpi_run_env_override(self, sysenv, argenv, env_var, expected):
        if not mpi_available():
            self.skipTest("MPI is not available")

        cmd = ['cmd']
        settings = self.minimal_settings

        def mpi_impl_flags(tcp, env=None):
            return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"]

        with mock.patch("horovod.run.mpi_run._get_mpi_implementation_flags", side_effect=mpi_impl_flags),\
             mock.patch("horovod.run.mpi_run.safe_shell_exec.execute", return_value=0) as execute,\
             override_env(sysenv):
            mpi_run(settings, None, argenv, cmd)

            # assert the env variable in the execute's env
            self.assertIn('env', execute.call_args.kwargs)
            self.assertEqual(execute.call_args.kwargs['env'].get(env_var),
                             expected)
예제 #7
0
파일: test_run.py 프로젝트: zpcalan/horovod
    def test_mpi_run_with_os_environ(self):
        if not mpi_available():
            self.skipTest("MPI is not available")

        cmd = ['cmd']
        settings = self.minimal_settings

        def mpi_impl_flags(tcp, env=None):
            return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"]

        with mock.patch("horovod.run.mpi_run._get_mpi_implementation_flags",
                        side_effect=mpi_impl_flags):
            with mock.patch("horovod.run.mpi_run.safe_shell_exec.execute",
                            return_value=0):
                with pytest.raises(
                        Exception,
                        match=
                        "^env argument must be a dict, not <class 'os._Environ'>: "
                ):
                    mpi_run(settings, None, os.environ, cmd)
예제 #8
0
파일: test_run.py 프로젝트: zpcalan/horovod
    def test_mpi_run_minimal(self):
        if not mpi_available():
            self.skipTest("MPI is not available")

        cmd = ['cmd']
        settings = self.minimal_settings

        def mpi_impl_flags(tcp, env=None):
            return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"]

        with mock.patch("horovod.run.mpi_run._get_mpi_implementation_flags",
                        side_effect=mpi_impl_flags):
            with mock.patch("horovod.run.mpi_run.safe_shell_exec.execute",
                            return_value=0) as execute:
                mpi_run(settings, None, {}, cmd)

                # call the mocked _get_mpi_implementation_flags method
                mpi_flags, binding_args = horovod.run.mpi_run._get_mpi_implementation_flags(
                    False)
                self.assertIsNotNone(mpi_flags)
                expected_cmd = ('mpirun '
                                '--allow-run-as-root --tag-output '
                                '-np 2 -H localhost:2 '
                                '{binding_args} '
                                '{mpi_flags}       '
                                'cmd').format(
                                    binding_args=' '.join(binding_args),
                                    mpi_flags=' '.join(mpi_flags))

                # remove PYTHONPATH from execute's env
                # we cannot know the exact value of that env variable
                # we test right handling of PYTHONPATH in test_mpi_run_*pythonpath* below
                self.assertIn('env', execute.call_args.kwargs)
                if 'PYTHONPATH' in execute.call_args.kwargs['env']:
                    execute.call_args.kwargs['env'].pop('PYTHONPATH')

                expected_env = {'PATH': os.environ.get('PATH')}
                execute.assert_called_once_with(expected_cmd,
                                                env=expected_env,
                                                stdout=None,
                                                stderr=None)
예제 #9
0
    def test_mpi_run_on_large_cluster(self):
        if not mpi_available():
            self.skipTest("MPI is not available")

        cmd = ['cmd']
        settings = copy.copy(self.minimal_settings)
        settings.num_hosts = large_cluster_threshold

        def mpi_impl_flags(tcp, env=None):
            return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"]

        with mock.patch("horovod.run.mpi_run._get_mpi_implementation_flags",
                        side_effect=mpi_impl_flags):
            with mock.patch("horovod.run.mpi_run.safe_shell_exec.execute",
                            return_value=0) as execute:
                mpi_run(settings, None, {}, cmd)

                # call the mocked _get_mpi_implementation_flags method
                mpi_flags, binding_args = horovod.run.mpi_run._get_mpi_implementation_flags(
                    False)
                self.assertIsNotNone(mpi_flags)
                mpi_flags.append('-mca plm_rsh_no_tree_spawn true')
                mpi_flags.append('-mca plm_rsh_num_concurrent {}'.format(
                    settings.num_hosts))
                expected_cmd = ('mpirun '
                                '--allow-run-as-root --tag-output '
                                '-np 2 -H host '
                                '{binding_args} '
                                '{mpi_flags}       '
                                'cmd').format(
                                    binding_args=' '.join(binding_args),
                                    mpi_flags=' '.join(mpi_flags))
                expected_env = {'PATH': os.environ.get('PATH')}
                execute.assert_called_once_with(expected_cmd,
                                                env=expected_env,
                                                stdout=None,
                                                stderr=None)