def test_allocate_job(self, mock_spawn):
        self.instance._spawn_ssh_SLURM_frontend = mock_spawn
        mock_spawn().sendline = Mock()

        # Successful call
        mock_spawn().expect = Mock(return_value=1)
        mock_spawn().after = 'JobState=RUNNING'
        mock_spawn.call_count = 0
        self.instance._allocate_job(False)
        self.assertEqual(mock_spawn.call_count, 1)
        self.assertNotEqual(mock_spawn().sendline.call_count, 0)

        # Job not running
        mock_spawn().expect = Mock(return_value=1)
        mock_spawn().after = 'JobState=WRONG'
        self.assertRaises(Exception, self.instance._allocate_job)

        # Kerberos authentication missing
        mock_spawn().expect = Mock(return_value=2)
        self.assertRaises(Exception, self.instance._allocate_job)

        # General error
        mock_spawn().expect = Mock(return_value=3)
        self.assertRaises(Exception, self.instance._allocate_job)

        self.instance = LuganoVizCluster(processes=4, gpus=1)
    def test_stop(self, mock_spawn):
        self.instance._clean_remote_files = Mock()
        self.instance._deallocate_job = Mock()

        self.instance._allocation_process = mock_spawn()
        mock_spawn().sendline = Mock()

        self.instance.stop()
        self.assertEqual(self.instance._clean_remote_files.call_count, 1)
        self.assertEqual(self.instance._deallocate_job.call_count, 1)

        self.instance = LuganoVizCluster(processes=4, gpus=1)
    def test_deallocate_job(self, mock_spawn):
        self.instance._allocation_process = mock_spawn()
        mock_spawn().sendline = Mock()

        self.instance._deallocate_job()
        self.assertTrue(True in [
            'exit' in x
            for x in [str(y) for y in mock_spawn().sendline.mock_calls]
        ])
        self.assertEqual('UNDEFINED', self.instance._state)
        self.assertIsNone(self.instance._allocation_process)

        self.instance = LuganoVizCluster(processes=4, gpus=1)
    def test_clean_remote_files(self, mock_spawn):
        self.instance._spawn_ssh_node = mock_spawn
        mock_spawn().sendline = Mock()

        # successful call
        self.instance._node = 'something that won\'t be used'
        self.instance._tmp_dir = 'foo'
        self.instance._allocation_process = mock_spawn()

        mock_spawn.call_count = 0
        self.instance._clean_remote_files()
        self.assertEqual(mock_spawn.call_count, 1)
        self.assertNotEqual(mock_spawn().sendline.call_count, 0)

        self.instance = LuganoVizCluster(processes=4, gpus=1)
    def test_configure_environment(self, mock_os_environ):

        nrp_variables_path = os.path.join(os.path.dirname(__file__),
                                          'nrp-variables')
        mock_os_environ.get = Mock(side_effect=['staging', nrp_variables_path])

        mock_process = Mock()
        mock_process.sendline = Mock()

        self.instance._configure_environment(mock_process)
        self.assertEqual(mock_os_environ.get.call_count, 2)
        mock_process.sendline.assert_any_call('export ENVIRONMENT=staging')
        mock_process.sendline.assert_any_call(
            'ADDITIONAL_PACKAGE_VERSION=1.1.0')
        mock_process.sendline.assert_any_call('SMALL_PACKAGE_VERSION=2.5.5')
        self.assertEqual(mock_process.sendline.call_count, 3)
        self.assertNotEqual(mock_process.expect, 0)

        self.instance = LuganoVizCluster(processes=4, gpus=1)
    def test_models_path(self, mock_spawn, mock_system):
        mock_system.return_value = 0
        self.instance = LuganoVizCluster(processes=4, gpus=1)
        self.instance._node = 'not none'
        self.instance._allocation_process = 'this neither'

        mock_spawn().expect.return_value = 0
        mock_spawn().readline.return_value = '/somewhere/under/the/rainbow'
        mock_spawn.call_count = 0

        self.instance._copy_to_remote('/somewhere/over/the/rainbow')
        self.assertEqual(mock_spawn.call_count, 1)
        self.assertNotEqual(mock_spawn().sendline.call_count, 0)
        self.assertNotEqual(mock_spawn().expect.call_count, 0)
        self.assertEqual(mock_spawn().readline.call_count, 2)

        mock_system.assert_any_call(
            'scp -r /somewhere/over/the/rainbow bbpnrsoa@not none.cscs.ch:/somewhere/under/the/rainbow'
            .split())

        # check error cases
        with self.assertRaises(Exception):
            mock_system.return_value = 9
            self.instance._copy_to_remote('/somewhere/over/the/rainbow')

        with self.assertRaises(Exception):
            mock_system.return_value = 10
            self.instance._copy_to_remote('/somewhere/over/the/rainbow')

        with self.assertRaises(Exception):
            mock_system.return_value = 65
            self.instance._copy_to_remote('/somewhere/over/the/rainbow')

        with self.assertRaises(Exception):
            mock_system.return_value = 66
            self.instance._copy_to_remote('/somewhere/over/the/rainbow')

        with self.assertRaises(Exception):
            mock_system.return_value = 42
            self.instance._copy_to_remote('/somewhere/over/the/rainbow')

        self.instance = LuganoVizCluster(processes=4, gpus=1)
예제 #7
0
    def stop(self):

        # stop the local watchdog client before terminating the remote side
        if self._watchdog_client is not None:
            self._watchdog_client.stop()
            self._watchdog_client = None

        # cluster node cleanup (this can fail, but make sure we always release the job below)
        try:
            # terminate running remote watchdog, gzserver, and invoking bash shell
            if self._gazebo_remote_process:
                notificator.info('Stopping Gazebo server on the cluster node')
                self._gazebo_remote_process.sendcontrol('z')
                self._gazebo_remote_process.sendline('kill -v -n 9 $WATCHDOG_PID')
                self._gazebo_remote_process.sendline('killall -v -9 gzserver')
                self._gazebo_remote_process.expect([pexpect.TIMEOUT,
                                                     'Killed',
                                                     'gzserver: no process killed'], self.TIMEOUT)
                self._gazebo_remote_process.terminate()

            # directly terminate Xvnc process (not invoked via bash)
            if self._remote_xvnc_process:
                notificator.info('Stopping cluster node graphics server')
                self._remote_xvnc_process.terminate()

        # pylint: disable=broad-except
        except Exception:
            logger.exception('Error cleaning up cluster node.')
        finally:
            self._gazebo_remote_process = None
            self._remote_xvnc_process = None
            self._remote_display_port = -1

        # SLURM cleanup and temporary folder deletion (must happen after any cluster cleanup as this
        # will deallocate the process)
        LuganoVizCluster.stop(self)

        # cleserver cleanup Xvfb, this is not critical
        if self._x_server_process:
            notificator.info('Stopping backend graphics server')
            self._x_server_process.terminate()
            self._x_server_process = None
 def setUp(self):
     self.instance = LuganoVizCluster(processes=4, gpus=1)
class TestLuganoVizCluster(unittest.TestCase):
    def setUp(self):
        self.instance = LuganoVizCluster(processes=4, gpus=1)

    @patch('pexpect.spawn')
    def test_spawn_ssh(self, mock_spawn):
        mock_spawn().sendline = Mock()

        # Successful call
        mock_spawn().expect = Mock(return_value=0)
        mock_spawn.call_count = 0
        self.instance._spawn_ssh('foo')
        self.assertEqual(mock_spawn.call_count, 1)
        self.assertNotEqual(mock_spawn().sendline, 0)

        # Missing password
        mock_spawn().expect = Mock(return_value=1)
        self.assertRaises(Exception, self.instance._spawn_ssh, 'foo')

        # Timeout
        mock_spawn().expect = Mock(return_value=2)
        self.assertRaises(Exception, self.instance._spawn_ssh, 'foo')

    def test_spawn_ssh_slurm_frontend(self):
        with patch.object(self.instance, '_spawn_ssh', Mock()) as mock_spawn:
            self.instance._spawn_ssh_SLURM_frontend()
            mock_spawn.assert_called_with('bbpviz1')

    def test_spawn_ssh_node(self):
        self.instance._node = 'fake_node'
        with patch.object(self.instance, '_spawn_ssh', Mock()) as mock_spawn:
            self.instance._spawn_ssh_node()
            mock_spawn.assert_called_with('fake_node')

    @patch('pexpect.spawn')
    def test_allocate_job(self, mock_spawn):
        self.instance._spawn_ssh_SLURM_frontend = mock_spawn
        mock_spawn().sendline = Mock()

        # Successful call
        mock_spawn().expect = Mock(return_value=1)
        mock_spawn().after = 'JobState=RUNNING'
        mock_spawn.call_count = 0
        self.instance._allocate_job(False)
        self.assertEqual(mock_spawn.call_count, 1)
        self.assertNotEqual(mock_spawn().sendline.call_count, 0)

        # Job not running
        mock_spawn().expect = Mock(return_value=1)
        mock_spawn().after = 'JobState=WRONG'
        self.assertRaises(Exception, self.instance._allocate_job)

        # Kerberos authentication missing
        mock_spawn().expect = Mock(return_value=2)
        self.assertRaises(Exception, self.instance._allocate_job)

        # General error
        mock_spawn().expect = Mock(return_value=3)
        self.assertRaises(Exception, self.instance._allocate_job)

        self.instance = LuganoVizCluster(processes=4, gpus=1)

    @patch('pexpect.spawn')
    def test_deallocate_job(self, mock_spawn):
        self.instance._allocation_process = mock_spawn()
        mock_spawn().sendline = Mock()

        self.instance._deallocate_job()
        self.assertTrue(True in [
            'exit' in x
            for x in [str(y) for y in mock_spawn().sendline.mock_calls]
        ])
        self.assertEqual('UNDEFINED', self.instance._state)
        self.assertIsNone(self.instance._allocation_process)

        self.instance = LuganoVizCluster(processes=4, gpus=1)

    @patch('pexpect.spawn')
    def test_clean_remote_files(self, mock_spawn):
        self.instance._spawn_ssh_node = mock_spawn
        mock_spawn().sendline = Mock()

        # successful call
        self.instance._node = 'something that won\'t be used'
        self.instance._tmp_dir = 'foo'
        self.instance._allocation_process = mock_spawn()

        mock_spawn.call_count = 0
        self.instance._clean_remote_files()
        self.assertEqual(mock_spawn.call_count, 1)
        self.assertNotEqual(mock_spawn().sendline.call_count, 0)

        self.instance = LuganoVizCluster(processes=4, gpus=1)

    @patch('subprocess.call')
    @patch('pexpect.spawn')
    def test_models_path(self, mock_spawn, mock_system):
        mock_system.return_value = 0
        self.instance = LuganoVizCluster(processes=4, gpus=1)
        self.instance._node = 'not none'
        self.instance._allocation_process = 'this neither'

        mock_spawn().expect.return_value = 0
        mock_spawn().readline.return_value = '/somewhere/under/the/rainbow'
        mock_spawn.call_count = 0

        self.instance._copy_to_remote('/somewhere/over/the/rainbow')
        self.assertEqual(mock_spawn.call_count, 1)
        self.assertNotEqual(mock_spawn().sendline.call_count, 0)
        self.assertNotEqual(mock_spawn().expect.call_count, 0)
        self.assertEqual(mock_spawn().readline.call_count, 2)

        mock_system.assert_any_call(
            'scp -r /somewhere/over/the/rainbow bbpnrsoa@not none.cscs.ch:/somewhere/under/the/rainbow'
            .split())

        # check error cases
        with self.assertRaises(Exception):
            mock_system.return_value = 9
            self.instance._copy_to_remote('/somewhere/over/the/rainbow')

        with self.assertRaises(Exception):
            mock_system.return_value = 10
            self.instance._copy_to_remote('/somewhere/over/the/rainbow')

        with self.assertRaises(Exception):
            mock_system.return_value = 65
            self.instance._copy_to_remote('/somewhere/over/the/rainbow')

        with self.assertRaises(Exception):
            mock_system.return_value = 66
            self.instance._copy_to_remote('/somewhere/over/the/rainbow')

        with self.assertRaises(Exception):
            mock_system.return_value = 42
            self.instance._copy_to_remote('/somewhere/over/the/rainbow')

        self.instance = LuganoVizCluster(processes=4, gpus=1)

    @patch('hbp_nrp_commons.cluster.LuganoVizCluster.os.environ')
    def test_configure_environment(self, mock_os_environ):

        nrp_variables_path = os.path.join(os.path.dirname(__file__),
                                          'nrp-variables')
        mock_os_environ.get = Mock(side_effect=['staging', nrp_variables_path])

        mock_process = Mock()
        mock_process.sendline = Mock()

        self.instance._configure_environment(mock_process)
        self.assertEqual(mock_os_environ.get.call_count, 2)
        mock_process.sendline.assert_any_call('export ENVIRONMENT=staging')
        mock_process.sendline.assert_any_call(
            'ADDITIONAL_PACKAGE_VERSION=1.1.0')
        mock_process.sendline.assert_any_call('SMALL_PACKAGE_VERSION=2.5.5')
        self.assertEqual(mock_process.sendline.call_count, 3)
        self.assertNotEqual(mock_process.expect, 0)

        self.instance = LuganoVizCluster(processes=4, gpus=1)

    @patch('pexpect.spawn')
    def test_stop(self, mock_spawn):
        self.instance._clean_remote_files = Mock()
        self.instance._deallocate_job = Mock()

        self.instance._allocation_process = mock_spawn()
        mock_spawn().sendline = Mock()

        self.instance.stop()
        self.assertEqual(self.instance._clean_remote_files.call_count, 1)
        self.assertEqual(self.instance._deallocate_job.call_count, 1)

        self.instance = LuganoVizCluster(processes=4, gpus=1)

    @patch('pexpect.spawn')
    def test_stop_except(self, mock_spawn):
        self.instance._clean_remote_files = Mock()
        self.instance._clean_remote_files.side_effect = Exception('foo')
        self.instance._deallocate_job = Mock()

        self.instance._allocation_process = mock_spawn()
        mock_spawn().sendline = Mock()

        self.instance.stop()
        self.assertEqual(self.instance._clean_remote_files.call_count, 1)
        self.assertEqual(self.instance._deallocate_job.call_count, 1)

        self.instance = LuganoVizCluster(processes=4, gpus=1)