def test_drain_machine(self): run_async(self.htcondor_adapter.drain_machine, drone_uuid="test") self.mock_executor.return_value.run_command.assert_called_with( "condor_drain -pool my-htcondor.local -test -graceful slot1@test") self.mock_executor.reset_mock() run_async(self.htcondor_adapter.drain_machine, drone_uuid="test_uuid") self.mock_executor.return_value.run_command.assert_called_with( "condor_drain -pool my-htcondor.local -test -graceful slot1@test_uuid@test" ) self.assertIsNone( run_async(self.htcondor_adapter.drain_machine, drone_uuid="not_exists")) self.mock_executor.return_value.run_command.side_effect = ( CommandExecutionFailure(message="Does not exists", exit_code=1, stderr="Does not exists")) with self.assertLogs(level=logging.WARNING): self.assertIsNone( run_async(self.htcondor_adapter.drain_machine, drone_uuid="test")) self.mock_executor.return_value.run_command.side_effect = ( CommandExecutionFailure(message="Unhandled error", exit_code=2, stderr="Unhandled error")) with self.assertRaises(CommandExecutionFailure): with self.assertLogs(level=logging.CRITICAL): self.assertIsNone( run_async(self.htcondor_adapter.drain_machine, drone_uuid="test")) self.mock_executor.return_value.run_command.side_effect = None
def test_exception_handling(self): def test_exception_handling(to_raise, to_catch): with self.assertRaises(to_catch): with self.moab_adapter.handle_exceptions(): raise to_raise matrix = [ (asyncio.TimeoutError(), TardisTimeout), ( asyncssh.Error(code=255, reason="Test", lang="Test"), TardisResourceStatusUpdateFailed, ), (IndexError, TardisResourceStatusUpdateFailed), (TardisResourceStatusUpdateFailed, TardisResourceStatusUpdateFailed), ( CommandExecutionFailure( message="Run test command", exit_code=1, stdout="Test", stderr="Test", ), TardisResourceStatusUpdateFailed, ), (Exception, TardisError), ] for to_raise, to_catch in matrix: test_exception_handling(to_raise, to_catch)
def test_exception_handling(self): def test_exception_handling(to_raise, to_catch): with self.assertRaises(to_catch): with self.slurm_adapter.handle_exceptions(): raise to_raise matrix = [(asyncio.TimeoutError(), TardisTimeout), (CommandExecutionFailure(message="Test", exit_code=255, stdout="Test", stderr="Test"), TardisResourceStatusUpdateFailed), (TardisResourceStatusUpdateFailed, TardisResourceStatusUpdateFailed), (Exception, TardisError)] for to_raise, to_catch in matrix: test_exception_handling(to_raise, to_catch)
def test_drain_machine(self): run_async(self.slurm_adapter.drain_machine, drone_uuid="VM-1") self.mock_async_run_command.assert_called_with( "scontrol update NodeName=host-10-18-1-1 State=DRAIN Reason='COBalD/TARDIS'" ) self.mock_async_run_command.reset_mock() self.assertIsNone( run_async(self.slurm_adapter.drain_machine, drone_uuid="not_exists")) self.mock_async_run_command.side_effect = CommandExecutionFailure( message="Does not exists", exit_code=1, stderr="Does not exists") with self.assertRaises(CommandExecutionFailure): self.assertIsNone( run_async(self.slurm_adapter.drain_machine, drone_uuid="idle_m")) self.mock_async_run_command.side_effect = None
def test_get_machine_status(self): state_mapping = { "VM-1": MachineStatus.Available, "not_exists": MachineStatus.NotAvailable, "draining_m": MachineStatus.Draining, "idle_m": MachineStatus.Available, "drained_m": MachineStatus.NotAvailable, "pwr_up_m": MachineStatus.NotAvailable, } for machine, state in state_mapping.items(): self.assertEqual( run_async(self.slurm_adapter.get_machine_status, drone_uuid=machine), state, ) self.mock_executor.reset_mock() self.mock_executor.return_value.run_command.side_effect = ( CommandExecutionFailure(message="Test", exit_code=123, stderr="Test") ) with self.assertLogs(level="WARN"): with self.assertRaises(CommandExecutionFailure): attributes = { "Machine": "Machine", "State": "State", "Activity": "Activity", "TardisDroneUuid": "TardisDroneUuid", } run_async( partial( slurm_status_updater, self.config.BatchSystem.options, attributes, self.mock_executor.return_value, ) ) self.mock_executor.return_value.run_command.assert_called_with( self.command ) self.mock_executor.return_value.run_command.side_effect = None
class TestSlurmAdapter(TestCase): mock_config_patcher = None mock_executor_patcher = None def check_attribute_dicts(self, expected_attributes, returned_attributes, exclude=tuple()): for key in expected_attributes.keys(): if key not in exclude: self.assertEqual(getattr(returned_attributes, key), getattr(expected_attributes, key)) @classmethod def setUpClass(cls): cls.mock_config_patcher = patch( "tardis.interfaces.siteadapter.Configuration") cls.mock_config = cls.mock_config_patcher.start() cls.mock_executor_patcher = patch( "tardis.adapters.sites.slurm.ShellExecutor") cls.mock_executor = cls.mock_executor_patcher.start() @classmethod def tearDownClass(cls): cls.mock_config_patcher.stop() cls.mock_executor_patcher.stop() def setUp(self): config = self.mock_config.return_value config.TestSite = MagicMock(spec=[ "MachineMetaData", "StatusUpdate", "MachineTypeConfiguration", "executor", ]) self.test_site_config = config.TestSite self.test_site_config.MachineMetaData = self.machine_meta_data self.test_site_config.StatusUpdate = 10 self.test_site_config.MachineTypeConfiguration = self.machine_type_configuration self.test_site_config.executor = self.mock_executor.return_value self.slurm_adapter = SlurmAdapter(machine_type="test2large", site_name="TestSite") def tearDown(self): pass @property def machine_meta_data(self): return AttributeDict( test2large=AttributeDict(Cores=20, Memory=62, Disk=100)) @property def machine_type_configuration(self): return AttributeDict(test2large=AttributeDict( Partition="normal", StartupCommand="pilot.sh", Walltime="60")) @property def resource_attributes(self): return AttributeDict( machine_type="test2large", site_name="TestSite", remote_resource_uuid=1390065, resource_status=ResourceStatus.Booting, drone_uuid="testsite-1390065", ) def test_start_up_command_deprecation_warning(self): # Necessary to avoid annoying message in PyCharm filterwarnings(action="ignore", message="unclosed", category=ResourceWarning) del self.test_site_config.MachineTypeConfiguration.test2large.StartupCommand with self.assertRaises(AttributeError): self.slurm_adapter = SlurmAdapter(machine_type="test2large", site_name="TestSite") self.test_site_config.StartupCommand = "pilot.sh" with self.assertWarns(DeprecationWarning): self.slurm_adapter = SlurmAdapter(machine_type="test2large", site_name="TestSite") @mock_executor_run_command(TEST_DEPLOY_RESOURCE_RESPONSE) def test_deploy_resource(self): expected_resource_attributes = self.resource_attributes expected_resource_attributes.update(created=datetime.now(), updated=datetime.now()) resource_attributes = AttributeDict( machine_type="test2large", site_name="TestSite", obs_machine_meta_data_translation_mapping=AttributeDict( Cores=1, Memory=1024, Disk=1024, ), drone_uuid="testsite-1390065", ) returned_resource_attributes = run_async( self.slurm_adapter.deploy_resource, resource_attributes) self.assertLess( returned_resource_attributes.created - expected_resource_attributes.created, timedelta(seconds=1), ) self.check_attribute_dicts( expected_resource_attributes, returned_resource_attributes, exclude=("created", "updated"), ) self.mock_executor.return_value.run_command.assert_called_with( "sbatch -p normal -N 1 -n 20 -t 60 --mem=63488mb --export=SLURM_Walltime=60,TardisDroneCores=20,TardisDroneMemory=63488,TardisDroneDisk=102400,TardisDroneUuid=testsite-1390065 pilot.sh" # noqa: B950 ) self.mock_executor.reset_mock() self.test_site_config.MachineMetaData.test2large.Memory = 2.5 run_async(self.slurm_adapter.deploy_resource, resource_attributes) self.mock_executor.return_value.run_command.assert_called_with( "sbatch -p normal -N 1 -n 20 -t 60 --mem=2560mb --export=SLURM_Walltime=60,TardisDroneCores=20,TardisDroneMemory=2560,TardisDroneDisk=102400,TardisDroneUuid=testsite-1390065 pilot.sh" # noqa: B950 ) self.mock_executor.reset_mock() self.test_site_config.MachineMetaData.test2large.Memory = 2.546372129 run_async(self.slurm_adapter.deploy_resource, resource_attributes) self.mock_executor.return_value.run_command.assert_called_with( "sbatch -p normal -N 1 -n 20 -t 60 --mem=2607mb --export=SLURM_Walltime=60,TardisDroneCores=20,TardisDroneMemory=2607,TardisDroneDisk=102400,TardisDroneUuid=testsite-1390065 pilot.sh" # noqa: B950 ) @mock_executor_run_command(TEST_DEPLOY_RESOURCE_RESPONSE) def test_deploy_resource_w_submit_options(self): self.test_site_config.MachineTypeConfiguration.test2large.SubmitOptions = ( AttributeDict(long=AttributeDict(gres="tmp:1G"))) slurm_adapter = SlurmAdapter(machine_type="test2large", site_name="TestSite") run_async( slurm_adapter.deploy_resource, resource_attributes=AttributeDict( machine_type="test2large", site_name="TestSite", obs_machine_meta_data_translation_mapping=AttributeDict( Cores=1, Memory=1000, Disk=1000, ), drone_uuid="testsite-1390065", ), ) self.mock_executor.return_value.run_command.assert_called_with( "sbatch -p normal -N 1 -n 20 -t 60 --gres=tmp:1G --mem=63488mb --export=SLURM_Walltime=60,TardisDroneCores=20,TardisDroneMemory=62000,TardisDroneDisk=100000,TardisDroneUuid=testsite-1390065 pilot.sh" # noqa: B950 ) def test_machine_meta_data(self): self.assertEqual(self.slurm_adapter.machine_meta_data, self.machine_meta_data["test2large"]) def test_machine_type(self): self.assertEqual(self.slurm_adapter.machine_type, "test2large") def test_site_name(self): self.assertEqual(self.slurm_adapter.site_name, "TestSite") @mock_executor_run_command(TEST_RESOURCE_STATUS_RESPONSE) def test_resource_status(self): expected_resource_attributes = self.resource_attributes expected_resource_attributes.update(updated=datetime.now()) returned_resource_attributes = run_async( self.slurm_adapter.resource_status, resource_attributes=self.resource_attributes, ) self.assertLess( (returned_resource_attributes.updated - expected_resource_attributes.updated), timedelta(seconds=1), ) self.check_attribute_dicts( expected_resource_attributes, returned_resource_attributes, exclude=("created", "updated"), ) self.mock_executor.return_value.run_command.assert_called_with( 'squeue -o "%A|%N|%T" -h -t all') @mock_executor_run_command(TEST_RESOURCE_STATUS_RESPONSE_RUNNING) def test_update_resource_status(self): self.assertEqual(self.resource_attributes["resource_status"], ResourceStatus.Booting) return_resource_attributes = run_async( self.slurm_adapter.resource_status, resource_attributes=self.resource_attributes, ) self.assertEqual(return_resource_attributes["resource_status"], ResourceStatus.Running) self.assertEqual( return_resource_attributes["drone_uuid"], self.resource_attributes["drone_uuid"], ) self.mock_executor.return_value.run_command.assert_called_with( 'squeue -o "%A|%N|%T" -h -t all') @mock_executor_run_command(TEST_RESOURCE_STATUS_RESPONSE_ALL_STATES) def test_resource_state_translation(self): state_translations = { "BOOT_FAIL": ResourceStatus.Error, "CANCELLED": ResourceStatus.Deleted, "COMPLETED": ResourceStatus.Deleted, "CONFIGURING": ResourceStatus.Booting, "COMPLETING": ResourceStatus.Running, "DEADLINE": ResourceStatus.Error, "FAILED": ResourceStatus.Error, "NODE_FAIL": ResourceStatus.Error, "OUT_OF_MEMORY": ResourceStatus.Error, "PENDING": ResourceStatus.Booting, "PREEMPTED": ResourceStatus.Deleted, "RUNNING": ResourceStatus.Running, "RESV_DEL_HOLD": ResourceStatus.Stopped, "REQUEUE_FED": ResourceStatus.Booting, "REQUEUE_HOLD": ResourceStatus.Booting, "REQUEUED": ResourceStatus.Booting, "RESIZING": ResourceStatus.Running, "REVOKED": ResourceStatus.Error, "SIGNALING": ResourceStatus.Running, "SPECIAL_EXIT": ResourceStatus.Booting, "STAGE_OUT": ResourceStatus.Running, "STOPPED": ResourceStatus.Stopped, "SUSPENDED": ResourceStatus.Stopped, "TIMEOUT": ResourceStatus.Error, } for id, value in enumerate(state_translations.values()): job_id = int(f"{id + 1000}000") returned_resource_attributes = run_async( self.slurm_adapter.resource_status, AttributeDict(remote_resource_uuid=job_id), ) self.assertEqual(returned_resource_attributes.resource_status, value) self.mock_executor.return_value.run_command.called_once() self.mock_executor.return_value.run_command.assert_called_with( 'squeue -o "%A|%N|%T" -h -t all') def test_resource_status_raise_update_failed(self): # Update interval is 10 minutes, so turn back last update by 2 minutes # and creation date to current date, so that # TardisResourceStatusUpdateFailed is raised created_timestamp = datetime.now() new_timestamp = datetime.now() - timedelta(minutes=2) self.slurm_adapter._slurm_status._last_update = new_timestamp with self.assertRaises(TardisResourceStatusUpdateFailed): run_async( self.slurm_adapter.resource_status, AttributeDict( remote_resource_uuid=1351043, resource_state=ResourceStatus.Booting, created=created_timestamp, ), ) @mock_executor_run_command("") def test_resource_status_of_completed_jobs(self): # Update interval is 10 minutes, so turn back last update by 11 minutes # and creation date to 12 minutes ago. => squeue should be executed # The empty string returned by squeue represents a resource is already # gone. So, ResourceStatus returned should be Deleted. past_timestamp = datetime.now() - timedelta(minutes=12) new_timestamp = datetime.now() - timedelta(minutes=11) self.slurm_adapter._slurm_status._last_update = new_timestamp response = run_async( self.slurm_adapter.resource_status, AttributeDict( resource_id="1390065", remote_resource_uuid="1351043", created=past_timestamp, ), ) self.assertEqual(response.resource_status, ResourceStatus.Deleted) self.mock_executor.return_value.run_command.assert_called_with( 'squeue -o "%A|%N|%T" -h -t all') @mock_executor_run_command( stdout="", raise_exception=CommandExecutionFailure(message="Failed", stdout="Failed", stderr="Failed", exit_code=2), ) def test_resource_status_update_failed(self): # set previous data, should be returned when update fails self.slurm_adapter._slurm_status._data = { "1390065": { "JobId": "1390065", "Host": "fh2n1552", "State": "RUNNING" } } with self.assertLogs(level=logging.WARNING): response = run_async( self.slurm_adapter.resource_status, AttributeDict(remote_resource_uuid="1390065"), ) self.check_attribute_dicts( AttributeDict( remote_resource_uuid=1390065, resource_status=ResourceStatus.Running, updated=datetime.now(), ), response, exclude=("updated", ), ) self.mock_executor.return_value.run_command.assert_called_with( 'squeue -o "%A|%N|%T" -h -t all') @mock_executor_run_command(stdout="", stderr="", exit_code=0) def test_stop_resource(self): run_async( self.slurm_adapter.stop_resource, resource_attributes=self.resource_attributes, ) self.mock_executor.return_value.run_command.assert_called_with( "scancel 1390065") @mock_executor_run_command(stdout="", stderr="", exit_code=0) def test_terminate_resource(self): run_async( self.slurm_adapter.terminate_resource, resource_attributes=self.resource_attributes, ) self.mock_executor.return_value.run_command.assert_called_with( "scancel 1390065") def test_exception_handling(self): def test_exception_handling(to_raise, to_catch): with self.assertRaises(to_catch): with self.assertLogs(level=logging.WARNING): with self.slurm_adapter.handle_exceptions(): raise to_raise matrix = [ (asyncio.TimeoutError(), TardisTimeout), ( CommandExecutionFailure(message="Test", exit_code=255, stdout="Test", stderr="Test"), TardisResourceStatusUpdateFailed, ), (TardisResourceStatusUpdateFailed, TardisResourceStatusUpdateFailed), (Exception, TardisError), ] for to_raise, to_catch in matrix: test_exception_handling(to_raise, to_catch)
def test_get_machine_status(self): self.assertEqual( run_async(self.htcondor_adapter.get_machine_status, drone_uuid="test"), MachineStatus.Available, ) self.mock_executor.return_value.run_command.assert_called_with( self.command) self.mock_executor.reset_mock() self.assertEqual( run_async(self.htcondor_adapter.get_machine_status, drone_uuid="not_exists"), MachineStatus.NotAvailable, ) self.mock_executor.reset_mock() self.assertEqual( run_async(self.htcondor_adapter.get_machine_status, drone_uuid="test_drain"), MachineStatus.Draining, ) self.mock_executor.reset_mock() self.assertEqual( run_async(self.htcondor_adapter.get_machine_status, drone_uuid="test_drained"), MachineStatus.Drained, ) self.mock_executor.reset_mock() self.assertEqual( run_async(self.htcondor_adapter.get_machine_status, drone_uuid="test_owner"), MachineStatus.NotAvailable, ) self.mock_executor.reset_mock() self.assertEqual( run_async(self.htcondor_adapter.get_machine_status, drone_uuid="test_uuid"), MachineStatus.Available, ) self.mock_executor.reset_mock() self.mock_executor.return_value.run_command.side_effect = ( CommandExecutionFailure(message="Test", exit_code=123, stderr="Test")) with self.assertLogs(level=logging.WARNING): with self.assertRaises(CommandExecutionFailure): attributes = { "Machine": "Machine", "State": "State", "Activity": "Activity", "TardisDroneUuid": "TardisDroneUuid", } # Escape htcondor expressions and add them to attributes attributes.update({ key: quote(value) for key, value in self.config.BatchSystem.ratios.items() }) run_async( partial( htcondor_status_updater, self.config.BatchSystem.options, attributes, self.mock_executor.return_value, )) self.mock_executor.return_value.run_command.assert_called_with( self.command) self.mock_executor.return_value.run_command.side_effect = None
class TestHTCondorSiteAdapter(TestCase): mock_config_patcher = None mock_executor_patcher = None @classmethod def setUpClass(cls): cls.mock_config_patcher = patch( "tardis.adapters.sites.htcondor.Configuration") cls.mock_config = cls.mock_config_patcher.start() cls.mock_executor_patcher = patch( "tardis.adapters.sites.htcondor.ShellExecutor") cls.mock_executor = cls.mock_executor_patcher.start() @classmethod def tearDownClass(cls): cls.mock_config_patcher.stop() cls.mock_executor_patcher.stop() def setUp(self): config = self.mock_config.return_value test_site_config = config.TestSite test_site_config.MachineMetaData = self.machine_meta_data test_site_config.MachineTypeConfiguration = self.machine_type_configuration test_site_config.executor = self.mock_executor.return_value test_site_config.max_age = 10 self.adapter = HTCondorAdapter(machine_type="test2large", site_name="TestSite") @property def machine_meta_data(self): return AttributeDict( test2large=AttributeDict(Cores=8, Memory=32, Disk=160), testunkownresource=AttributeDict(Cores=8, Memory=32, Disk=160, Foo=3), ) @property def machine_type_configuration(self): return AttributeDict( test2large=AttributeDict(jdl="tests/data/submit.jdl"), testunkownresource=AttributeDict(jdl="tests/data/submit.jdl"), ) @mock_executor_run_command(stdout=CONDOR_SUBMIT_OUTPUT) def test_deploy_resource(self): response = run_async(self.adapter.deploy_resource, AttributeDict(drone_uuid="test-123")) self.assertEqual(response.remote_resource_uuid, "1351043") self.assertFalse( response.created - datetime.now() > timedelta(seconds=1)) self.assertFalse( response.updated - datetime.now() > timedelta(seconds=1)) self.mock_executor.return_value.run_command.assert_called_with( "condor_submit", stdin_input=CONDOR_SUBMIT_JDL) self.mock_executor.reset() def test_translate_resources_raises_logs(self): self.adapter = HTCondorAdapter(machine_type="testunkownresource", site_name="TestSite") with self.assertLogs(logging.getLogger(), logging.ERROR): with self.assertRaises(KeyError): run_async(self.adapter.deploy_resource, AttributeDict(drone_uuid="test-123")) def test_machine_meta_data(self): self.assertEqual(self.adapter.machine_meta_data, self.machine_meta_data.test2large) def test_machine_type(self): self.assertEqual(self.adapter.machine_type, "test2large") def test_site_name(self): self.assertEqual(self.adapter.site_name, "TestSite") @mock_executor_run_command(stdout=CONDOR_Q_OUTPUT_IDLE) def test_resource_status_idle(self): response = run_async(self.adapter.resource_status, AttributeDict(remote_resource_uuid="1351043")) self.assertEqual(response.resource_status, ResourceStatus.Booting) @mock_executor_run_command(stdout=CONDOR_Q_OUTPUT_RUN) def test_resource_status_run(self): response = run_async(self.adapter.resource_status, AttributeDict(remote_resource_uuid="1351043")) self.assertEqual(response.resource_status, ResourceStatus.Running) @mock_executor_run_command(stdout=CONDOR_Q_OUTPUT_REMOVING) def test_resource_status_removing(self): response = run_async(self.adapter.resource_status, AttributeDict(remote_resource_uuid="1351043")) self.assertEqual(response.resource_status, ResourceStatus.Running) @mock_executor_run_command(stdout=CONDOR_Q_OUTPUT_COMPLETED) def test_resource_status_completed(self): response = run_async(self.adapter.resource_status, AttributeDict(remote_resource_uuid="1351043")) self.assertEqual(response.resource_status, ResourceStatus.Deleted) @mock_executor_run_command(stdout=CONDOR_Q_OUTPUT_HELD) def test_resource_status_held(self): response = run_async(self.adapter.resource_status, AttributeDict(remote_resource_uuid="1351043")) self.assertEqual(response.resource_status, ResourceStatus.Error) @mock_executor_run_command(stdout=CONDOR_Q_OUTPUT_TRANSFERING_OUTPUT) def test_resource_status_transfering_output(self): response = run_async(self.adapter.resource_status, AttributeDict(remote_resource_uuid="1351043")) self.assertEqual(response.resource_status, ResourceStatus.Running) @mock_executor_run_command(stdout=CONDOR_Q_OUTPUT_SUSPENDED) def test_resource_status_unexpanded(self): response = run_async(self.adapter.resource_status, AttributeDict(remote_resource_uuid="1351043")) self.assertEqual(response.resource_status, ResourceStatus.Stopped) @mock_executor_run_command( stdout="", raise_exception=CommandExecutionFailure(message="Failed", stdout="Failed", stderr="Failed", exit_code=2), ) def test_resource_status_raise_future(self): future_timestamp = datetime.now() + timedelta(minutes=1) with self.assertLogs(logging.getLogger(), logging.ERROR): with self.assertRaises(TardisResourceStatusUpdateFailed): run_async( self.adapter.resource_status, AttributeDict(remote_resource_uuid="1351043", created=future_timestamp), ) @mock_executor_run_command( stdout="", raise_exception=CommandExecutionFailure(message="Failed", stdout="Failed", stderr="Failed", exit_code=2), ) def test_resource_status_raise_past(self): # Update interval is 10 minutes, so set last update back by 11 minutes in order to execute condor_q command and # creation date to 12 minutes ago past_timestamp = datetime.now() - timedelta(minutes=12) self.adapter._htcondor_queue._last_update = datetime.now() - timedelta( minutes=11) with self.assertLogs(logging.getLogger(), logging.ERROR): response = run_async( self.adapter.resource_status, AttributeDict(remote_resource_uuid="1351043", created=past_timestamp), ) self.assertEqual(response.resource_status, ResourceStatus.Deleted) @mock_executor_run_command(stdout=CONDOR_SUSPEND_OUTPUT) def test_stop_resource(self): response = run_async(self.adapter.stop_resource, AttributeDict(remote_resource_uuid="1351043")) self.assertEqual(response.remote_resource_uuid, "1351043") @mock_executor_run_command( stdout="", raise_exception=CommandExecutionFailure( message=CONDOR_SUSPEND_FAILED_MESSAGE, exit_code=1, stderr=CONDOR_SUSPEND_FAILED_OUTPUT, stdout="", stdin="", ), ) def test_stop_resource_failed_redo(self): with self.assertRaises(TardisResourceStatusUpdateFailed): run_async( self.adapter.stop_resource, AttributeDict(remote_resource_uuid="1351043"), ) @mock_executor_run_command( stdout="", raise_exception=CommandExecutionFailure( message=CONDOR_SUSPEND_FAILED_MESSAGE, exit_code=2, stderr=CONDOR_SUSPEND_FAILED_OUTPUT, stdout="", stdin="", ), ) def test_stop_resource_failed_raise(self): with self.assertRaises(CommandExecutionFailure): run_async( self.adapter.stop_resource, AttributeDict(remote_resource_uuid="1351043"), ) @mock_executor_run_command(stdout=CONDOR_RM_OUTPUT) def test_terminate_resource(self): response = run_async( self.adapter.terminate_resource, AttributeDict(remote_resource_uuid="1351043"), ) self.assertEqual(response.remote_resource_uuid, "1351043") @mock_executor_run_command( stdout="", raise_exception=CommandExecutionFailure( message=CONDOR_RM_FAILED_MESSAGE, exit_code=1, stderr=CONDOR_RM_FAILED_OUTPUT, stdout="", stdin="", ), ) def test_terminate_resource_failed_redo(self): with self.assertRaises(TardisResourceStatusUpdateFailed): run_async( self.adapter.terminate_resource, AttributeDict(remote_resource_uuid="1351043"), ) @mock_executor_run_command( stdout="", raise_exception=CommandExecutionFailure( message=CONDOR_RM_FAILED_MESSAGE, exit_code=2, stderr=CONDOR_RM_FAILED_OUTPUT, stdout="", stdin="", ), ) def test_terminate_resource_failed_raise(self): with self.assertRaises(CommandExecutionFailure): run_async( self.adapter.terminate_resource, AttributeDict(remote_resource_uuid="1351043"), ) def test_exception_handling(self): def test_exception_handling(raise_it, catch_it): with self.assertRaises(catch_it): with self.adapter.handle_exceptions(): raise raise_it matrix = [ (Exception, TardisError), (TardisResourceStatusUpdateFailed, TardisResourceStatusUpdateFailed), ] for to_raise, to_catch in matrix: test_exception_handling(to_raise, to_catch)
class TestHTCondorSiteAdapter(TestCase): mock_config_patcher = None mock_executor_patcher = None @classmethod def setUpClass(cls): cls.mock_config_patcher = patch('tardis.adapters.sites.htcondor.Configuration') cls.mock_config = cls.mock_config_patcher.start() cls.mock_executor_patcher = patch('tardis.adapters.sites.htcondor.ShellExecutor') cls.mock_executor = cls.mock_executor_patcher.start() @classmethod def tearDownClass(cls): cls.mock_config_patcher.stop() cls.mock_executor_patcher.stop() def setUp(self): config = self.mock_config.return_value test_site_config = config.TestSite test_site_config.MachineMetaData = self.machine_meta_data test_site_config.MachineTypeConfiguration = self.machine_type_configuration test_site_config.executor = self.mock_executor.return_value test_site_config.max_age = 10 self.adapter = HTCondorAdapter(machine_type='test2large', site_name='TestSite') @property def machine_meta_data(self): return AttributeDict(test2large=AttributeDict(Cores=8, Memory=32), testunkownresource=AttributeDict(Cores=8, Memory=32, Foo=3)) @property def machine_type_configuration(self): return AttributeDict(test2large=AttributeDict(jdl='submit.jdl'), testunkownresource=AttributeDict(jdl='submit.jdl')) @mock_executor_run_command(stdout=CONDOR_SUBMIT_OUTPUT) def test_deploy_resource(self): response = run_async(self.adapter.deploy_resource, AttributeDict(drone_uuid='test-123')) self.assertEqual(response.remote_resource_uuid, "1351043") self.assertFalse(response.created - datetime.now() > timedelta(seconds=1)) self.assertFalse(response.updated - datetime.now() > timedelta(seconds=1)) self.mock_executor.return_value.run_command.assert_called_with( 'condor_submit -append "environment = TardisDroneUuid=test-123;TardisDroneCores=8;TardisDroneMemory=32768"' ' -a "request_cpus = 8" -a "request_memory = 32768" submit.jdl') self.mock_executor.reset() def test_translate_resources_raises_logs(self): self.adapter = HTCondorAdapter(machine_type='testunkownresource', site_name='TestSite') with self.assertLogs(logging.getLogger(), logging.ERROR): with self.assertRaises(KeyError): run_async(self.adapter.deploy_resource, AttributeDict(drone_uuid='test-123')) def test_machine_meta_data(self): self.assertEqual(self.adapter.machine_meta_data, self.machine_meta_data.test2large) def test_machine_type(self): self.assertEqual(self.adapter.machine_type, 'test2large') def test_site_name(self): self.assertEqual(self.adapter.site_name, 'TestSite') @mock_executor_run_command(stdout=CONDOR_Q_OUTPUT_UNEXANPANDED) def test_resource_status_unexpanded(self): response = run_async(self.adapter.resource_status, AttributeDict(remote_resource_uuid="1351043")) self.assertEqual(response.resource_status, ResourceStatus.Error) @mock_executor_run_command(stdout=CONDOR_Q_OUTPUT_IDLE) def test_resource_status_idle(self): response = run_async(self.adapter.resource_status, AttributeDict(remote_resource_uuid="1351043")) self.assertEqual(response.resource_status, ResourceStatus.Booting) @mock_executor_run_command(stdout=CONDOR_Q_OUTPUT_RUN) def test_resource_status_run(self): response = run_async(self.adapter.resource_status, AttributeDict(remote_resource_uuid="1351043")) self.assertEqual(response.resource_status, ResourceStatus.Running) @mock_executor_run_command(stdout=CONDOR_Q_OUTPUT_COMPLETED) def test_resource_status_idle(self): response = run_async(self.adapter.resource_status, AttributeDict(remote_resource_uuid="1351043")) self.assertEqual(response.resource_status, ResourceStatus.Stopped) @mock_executor_run_command(stdout=CONDOR_Q_OUTPUT_HELD) def test_resource_status_idle(self): response = run_async(self.adapter.resource_status, AttributeDict(remote_resource_uuid="1351043")) self.assertEqual(response.resource_status, ResourceStatus.Error) @mock_executor_run_command(stdout=CONDOR_Q_OUTPUT_SUBMISSION_ERR) def test_resource_status_idle(self): response = run_async(self.adapter.resource_status, AttributeDict(remote_resource_uuid="1351043")) self.assertEqual(response.resource_status, ResourceStatus.Error) @mock_executor_run_command(stdout="", raise_exception=CommandExecutionFailure(message="Failed", stdout="Failed", stderr="Failed", exit_code=2)) def test_resource_status_raise_future(self): future_timestamp = datetime.now() + timedelta(minutes=1) with self.assertLogs(logging.getLogger(), logging.ERROR): with self.assertRaises(TardisResourceStatusUpdateFailed): run_async(self.adapter.resource_status, AttributeDict(remote_resource_uuid="1351043", created=future_timestamp)) @mock_executor_run_command(stdout="", raise_exception=CommandExecutionFailure(message="Failed", stdout="Failed", stderr="Failed", exit_code=2)) def test_resource_status_raise_past(self): # Update interval is 10 minutes, so set last update back by 11 minutes in order to execute condor_q command and # creation date to 12 minutes ago past_timestamp = datetime.now() - timedelta(minutes=12) self.adapter._htcondor_queue._last_update = datetime.now() - timedelta(minutes=11) with self.assertLogs(logging.getLogger(), logging.ERROR): response = run_async(self.adapter.resource_status, AttributeDict(remote_resource_uuid="1351043", created=past_timestamp)) self.assertEqual(response.resource_status, ResourceStatus.Deleted) @mock_executor_run_command(stdout=CONDOR_RM_OUTPUT) def test_stop_resource(self): response = run_async(self.adapter.stop_resource, AttributeDict(remote_resource_uuid="1351043")) self.assertEqual(response.remote_resource_uuid, "1351043") @mock_executor_run_command(stdout=CONDOR_RM_OUTPUT) def test_terminate_resource(self): response = run_async(self.adapter.terminate_resource, AttributeDict(remote_resource_uuid="1351043")) self.assertEqual(response.remote_resource_uuid, "1351043") def test_exception_handling(self): def test_exception_handling(raise_it, catch_it): with self.assertRaises(catch_it): with self.adapter.handle_exceptions(): raise raise_it matrix = [(Exception, TardisError), (TardisResourceStatusUpdateFailed, TardisResourceStatusUpdateFailed)] for to_raise, to_catch in matrix: test_exception_handling(to_raise, to_catch)
async def command_failing_update_function(self): raise CommandExecutionFailure(message='Failure', stdout='Failure', stderr='Failure', exit_code=2)
class TestMoabAdapter(TestCase): @classmethod def setUpClass(cls): cls.mock_config_patcher = patch( 'tardis.adapters.sites.moab.Configuration') cls.mock_config = cls.mock_config_patcher.start() cls.mock_executor_patcher = patch( 'tardis.adapters.sites.moab.ShellExecutor') cls.mock_executor = cls.mock_executor_patcher.start() @classmethod def tearDownClass(cls): cls.mock_config_patcher.stop() cls.mock_executor_patcher.stop() def setUp(self): config = self.mock_config.return_value self.test_site_config = config.TestSite self.test_site_config.MachineMetaData = self.machine_meta_data self.test_site_config.StartupCommand = 'startVM.py' self.test_site_config.StatusUpdate = 10 self.test_site_config.MachineTypeConfiguration = self.machine_type_configuration self.test_site_config.executor = self.mock_executor.return_value self.moab_adapter = MoabAdapter(machine_type='test2large', site_name='TestSite') def tearDown(self): pass @property def machine_meta_data(self): return AttributeDict(test2large=AttributeDict(Cores=128, Memory='120')) @property def machine_type_configuration(self): return AttributeDict(test2large=AttributeDict(NodeType='1:ppn=20', Walltime='02:00:00:00')) @property def resource_attributes(self): return AttributeDict( machine_type='test2large', site_name='TestSite', remote_resource_uuid=4761849, resource_status=ResourceStatus.Booting, created=datetime.strptime("Wed Jan 23 2019 15:01:47", '%a %b %d %Y %H:%M:%S'), updated=datetime.strptime("Wed Jan 23 2019 15:02:17", '%a %b %d %Y %H:%M:%S'), drone_uuid='testsite-4761849') @mock_executor_run_command(TEST_DEPLOY_RESOURCE_RESPONSE) def test_deploy_resource(self): expected_resource_attributes = self.resource_attributes expected_resource_attributes.update(created=datetime.now(), updated=datetime.now()) return_resource_attributes = run_async( self.moab_adapter.deploy_resource, resource_attributes=AttributeDict(machine_type='test2large', site_name='TestSite')) if return_resource_attributes.created - expected_resource_attributes.created > timedelta(seconds=1) or \ return_resource_attributes.updated - expected_resource_attributes.updated > timedelta(seconds=1): raise Exception("Creation time or update time wrong!") del expected_resource_attributes.created, expected_resource_attributes.updated, \ return_resource_attributes.created, return_resource_attributes.updated self.assertEqual(return_resource_attributes, expected_resource_attributes) self.mock_executor.return_value.run_command.assert_called_with( 'msub -j oe -m p -l walltime=02:00:00:00,mem=120gb,nodes=1:ppn=20 startVM.py' ) def test_machine_meta_data(self): self.assertEqual(self.moab_adapter.machine_meta_data, self.machine_meta_data['test2large']) def test_machine_type(self): self.assertEqual(self.moab_adapter.machine_type, 'test2large') def test_site_name(self): self.assertEqual(self.moab_adapter.site_name, 'TestSite') @mock_executor_run_command(TEST_RESOURCE_STATUS_RESPONSE) def test_resource_status(self): expected_resource_attributes = self.resource_attributes expected_resource_attributes.update(updated=datetime.now()) return_resource_attributes = run_async( self.moab_adapter.resource_status, resource_attributes=self.resource_attributes) if return_resource_attributes.updated - expected_resource_attributes.updated > timedelta( seconds=1): raise Exception("Update time wrong!") del expected_resource_attributes.updated, return_resource_attributes.updated self.assertEqual(return_resource_attributes, expected_resource_attributes) @mock_executor_run_command(TEST_RESOURCE_STATUS_RESPONSE_RUNNING) def test_resource_status_update(self): self.assertEqual(self.resource_attributes["resource_status"], ResourceStatus.Booting) return_resource_attributes = run_async( self.moab_adapter.resource_status, resource_attributes=self.resource_attributes) self.assertEqual(return_resource_attributes["resource_status"], ResourceStatus.Running) @mock_executor_run_command(TEST_TERMINATE_RESOURCE_RESPONSE) def test_stop_resource(self): expected_resource_attributes = self.resource_attributes expected_resource_attributes.update( updated=datetime.now(), resource_status=ResourceStatus.Stopped) return_resource_attributes = run_async( self.moab_adapter.stop_resource, resource_attributes=self.resource_attributes) if return_resource_attributes.updated - expected_resource_attributes.updated > timedelta( seconds=1): raise Exception("Update time wrong!") del expected_resource_attributes.updated, return_resource_attributes.updated self.assertEqual(return_resource_attributes, expected_resource_attributes) @mock_executor_run_command(TEST_TERMINATE_RESOURCE_RESPONSE) def test_terminate_resource(self): expected_resource_attributes = self.resource_attributes expected_resource_attributes.update( updated=datetime.now(), resource_status=ResourceStatus.Stopped) return_resource_attributes = run_async( self.moab_adapter.terminate_resource, resource_attributes=self.resource_attributes) if return_resource_attributes.updated - expected_resource_attributes.updated > timedelta( seconds=1): raise Exception("Update time wrong!") del expected_resource_attributes.updated, return_resource_attributes.updated self.assertEqual(return_resource_attributes, expected_resource_attributes) @mock_executor_run_command( "", stderr=TEST_TERMINATE_DEAD_RESOURCE_RESPONSE, exit_code=1, raise_exception=CommandExecutionFailure( message='Test', stdout="", stderr=TEST_TERMINATE_DEAD_RESOURCE_RESPONSE, exit_code=1)) def test_terminate_dead_resource(self): expected_resource_attributes = self.resource_attributes expected_resource_attributes.update( updated=datetime.now(), resource_status=ResourceStatus.Stopped) return_resource_attributes = run_async( self.moab_adapter.terminate_resource, resource_attributes=self.resource_attributes) self.assertEqual(return_resource_attributes["resource_status"], ResourceStatus.Stopped) @mock_executor_run_command("", exit_code=2, raise_exception=CommandExecutionFailure( message='Test', stdout="", stderr="", exit_code=2)) def test_terminate_resource_error(self): with self.assertRaises(CommandExecutionFailure): run_async(self.moab_adapter.terminate_resource, resource_attributes=self.resource_attributes) def test_resource_status_raise(self): # Update interval is 10 minutes, so set last update back by 2 minutes in order to execute sacct command and # creation date to current date created_timestamp = datetime.now() new_timestamp = datetime.now() - timedelta(minutes=2) self.moab_adapter._moab_status._last_update = new_timestamp with self.assertRaises(TardisResourceStatusUpdateFailed): response = run_async( self.moab_adapter.resource_status, AttributeDict(resource_id=1351043, remote_resource_uuid=1351043, resource_state=ResourceStatus.Booting, created=created_timestamp)) def test_resource_status_raise_past(self): # Update interval is 10 minutes, so set last update back by 11 minutes in order to execute sacct command and # creation date to 12 minutes ago past_timestamp = datetime.now() - timedelta(minutes=12) new_timestamp = datetime.now() - timedelta(minutes=11) self.moab_adapter._moab_status._last_update = new_timestamp response = run_async( self.moab_adapter.resource_status, AttributeDict(resource_id=1390065, remote_resource_uuid=1351043, created=past_timestamp)) self.assertEqual(response.resource_status, ResourceStatus.Stopped) def test_exception_handling(self): def test_exception_handling(to_raise, to_catch): with self.assertRaises(to_catch): with self.moab_adapter.handle_exceptions(): raise to_raise matrix = [(asyncio.TimeoutError(), TardisTimeout), (asyncssh.Error(code=255, reason="Test", lang="Test"), TardisResourceStatusUpdateFailed), (IndexError, TardisResourceStatusUpdateFailed), (TardisResourceStatusUpdateFailed, TardisResourceStatusUpdateFailed), (CommandExecutionFailure(message="Run test command", exit_code=1, stdout="Test", stderr="Test"), TardisResourceStatusUpdateFailed), (Exception, TardisError)] for to_raise, to_catch in matrix: test_exception_handling(to_raise, to_catch) def test_check_remote_resource_uuid(self): with self.assertRaises(TardisError): self.moab_adapter.check_remote_resource_uuid( AttributeDict(remote_resource_uuid=1), regex=r"^(\d)$", response="2")
class TestMoabAdapter(TestCase): @classmethod def setUpClass(cls): cls.mock_config_patcher = patch( "tardis.adapters.sites.moab.Configuration") cls.mock_config = cls.mock_config_patcher.start() cls.mock_executor_patcher = patch( "tardis.adapters.sites.moab.ShellExecutor") cls.mock_executor = cls.mock_executor_patcher.start() @classmethod def tearDownClass(cls): cls.mock_config_patcher.stop() cls.mock_executor_patcher.stop() def setUp(self): config = self.mock_config.return_value config.TestSite = MagicMock(spec=[ "MachineMetaData", "StatusUpdate", "MachineTypeConfiguration", "executor", ]) self.test_site_config = config.TestSite self.test_site_config.MachineMetaData = self.machine_meta_data self.test_site_config.StatusUpdate = 10 self.test_site_config.MachineTypeConfiguration = self.machine_type_configuration self.test_site_config.executor = self.mock_executor.return_value self.moab_adapter = MoabAdapter(machine_type="test2large", site_name="TestSite") def tearDown(self): pass @property def machine_meta_data(self): return AttributeDict(test2large=AttributeDict(Cores=128, Memory="120")) @property def machine_type_configuration(self): return AttributeDict( test2large=AttributeDict(NodeType="1:ppn=20", StartupCommand="startVM.py", Walltime="02:00:00:00")) @property def resource_attributes(self): return AttributeDict( machine_type="test2large", site_name="TestSite", remote_resource_uuid=4761849, resource_status=ResourceStatus.Booting, created=datetime.strptime("Wed Jan 23 2019 15:01:47", "%a %b %d %Y %H:%M:%S"), updated=datetime.strptime("Wed Jan 23 2019 15:02:17", "%a %b %d %Y %H:%M:%S"), drone_uuid="testsite-4761849", ) def test_start_up_command_deprecation_warning(self): # Necessary to avoid annoying message in PyCharm filterwarnings(action="ignore", message="unclosed", category=ResourceWarning) del self.test_site_config.MachineTypeConfiguration.test2large.StartupCommand with self.assertRaises(AttributeError): self.moab_adapter = MoabAdapter(machine_type="test2large", site_name="TestSite") self.test_site_config.StartupCommand = "startVM.py" with self.assertWarns(DeprecationWarning): self.moab_adapter = MoabAdapter(machine_type="test2large", site_name="TestSite") @mock_executor_run_command(TEST_DEPLOY_RESOURCE_RESPONSE) def test_deploy_resource(self): expected_resource_attributes = self.resource_attributes expected_resource_attributes.update(created=datetime.now(), updated=datetime.now()) return_resource_attributes = run_async( self.moab_adapter.deploy_resource, resource_attributes=AttributeDict(machine_type="test2large", site_name="TestSite"), ) if return_resource_attributes.created - expected_resource_attributes.created > timedelta( seconds=1 ) or return_resource_attributes.updated - expected_resource_attributes.updated > timedelta( seconds=1): raise Exception("Creation time or update time wrong!") del ( expected_resource_attributes.created, expected_resource_attributes.updated, return_resource_attributes.created, return_resource_attributes.updated, ) self.assertEqual(return_resource_attributes, expected_resource_attributes) self.mock_executor.return_value.run_command.assert_called_with( "msub -j oe -m p -l walltime=02:00:00:00,mem=120gb,nodes=1:ppn=20 startVM.py" ) def test_machine_meta_data(self): self.assertEqual(self.moab_adapter.machine_meta_data, self.machine_meta_data["test2large"]) def test_machine_type(self): self.assertEqual(self.moab_adapter.machine_type, "test2large") def test_site_name(self): self.assertEqual(self.moab_adapter.site_name, "TestSite") @mock_executor_run_command(TEST_RESOURCE_STATUS_RESPONSE) def test_resource_status(self): expected_resource_attributes = self.resource_attributes expected_resource_attributes.update(updated=datetime.now()) return_resource_attributes = run_async( self.moab_adapter.resource_status, resource_attributes=self.resource_attributes, ) if (return_resource_attributes.updated - expected_resource_attributes.updated > timedelta(seconds=1)): raise Exception("Update time wrong!") del expected_resource_attributes.updated, return_resource_attributes.updated self.assertEqual(return_resource_attributes, expected_resource_attributes) @mock_executor_run_command(TEST_RESOURCE_STATE_TRANSLATION_RESPONSE) def test_resource_state_translation(self): for num, (_, state) in enumerate(STATE_TRANSLATIONS): job_id = f"76242{num:02}" return_resource_attributes = run_async( self.moab_adapter.resource_status, AttributeDict(remote_resource_uuid=job_id), ) self.assertEqual(return_resource_attributes.resource_status, state) self.mock_executor.return_value.run_command.assert_called_with( "showq --xml -w user=$(whoami) && showq -c --xml -w user=$(whoami)" ) @mock_executor_run_command(TEST_RESOURCE_STATUS_RESPONSE_RUNNING) def test_resource_status_update(self): self.assertEqual(self.resource_attributes["resource_status"], ResourceStatus.Booting) return_resource_attributes = run_async( self.moab_adapter.resource_status, resource_attributes=self.resource_attributes, ) self.assertEqual(return_resource_attributes["resource_status"], ResourceStatus.Running) @mock_executor_run_command(TEST_TERMINATE_RESOURCE_RESPONSE) def test_stop_resource(self): expected_resource_attributes = self.resource_attributes expected_resource_attributes.update( updated=datetime.now(), resource_status=ResourceStatus.Stopped) return_resource_attributes = run_async( self.moab_adapter.stop_resource, resource_attributes=self.resource_attributes, ) if (return_resource_attributes.updated - expected_resource_attributes.updated > timedelta(seconds=1)): raise Exception("Update time wrong!") del expected_resource_attributes.updated, return_resource_attributes.updated self.assertEqual(return_resource_attributes, expected_resource_attributes) @mock_executor_run_command(TEST_TERMINATE_RESOURCE_RESPONSE) def test_terminate_resource(self): expected_resource_attributes = self.resource_attributes expected_resource_attributes.update( updated=datetime.now(), resource_status=ResourceStatus.Stopped) return_resource_attributes = run_async( self.moab_adapter.terminate_resource, resource_attributes=self.resource_attributes, ) if (return_resource_attributes.updated - expected_resource_attributes.updated > timedelta(seconds=1)): raise Exception("Update time wrong!") del expected_resource_attributes.updated, return_resource_attributes.updated self.assertEqual(return_resource_attributes, expected_resource_attributes) @mock_executor_run_command( "", stderr=TEST_TERMINATE_DEAD_RESOURCE_RESPONSE, exit_code=1, raise_exception=CommandExecutionFailure( message="Test", stdout="", stderr=TEST_TERMINATE_DEAD_RESOURCE_RESPONSE, exit_code=1, ), ) def test_terminate_dead_resource(self): expected_resource_attributes = self.resource_attributes expected_resource_attributes.update( updated=datetime.now(), resource_status=ResourceStatus.Stopped) return_resource_attributes = run_async( self.moab_adapter.terminate_resource, resource_attributes=self.resource_attributes, ) self.assertEqual(return_resource_attributes["resource_status"], ResourceStatus.Stopped) @mock_executor_run_command( "", exit_code=2, raise_exception=CommandExecutionFailure(message="Test", stdout="", stderr="", exit_code=2), ) def test_terminate_resource_error(self): with self.assertRaises(CommandExecutionFailure): run_async( self.moab_adapter.terminate_resource, resource_attributes=self.resource_attributes, ) def test_resource_status_raise(self): # Update interval is 10 minutes, so set last update back by 2 minutes in order to execute sacct command and # creation date to current date created_timestamp = datetime.now() new_timestamp = datetime.now() - timedelta(minutes=2) self.moab_adapter._moab_status._last_update = new_timestamp with self.assertRaises(TardisResourceStatusUpdateFailed): run_async( self.moab_adapter.resource_status, AttributeDict( resource_id=1351043, remote_resource_uuid=1351043, resource_state=ResourceStatus.Booting, created=created_timestamp, ), ) def test_resource_status_raise_past(self): # Update interval is 10 minutes, so set last update back by 11 minutes in order to execute sacct command and # creation date to 12 minutes ago creation_timestamp = datetime.now() - timedelta(minutes=12) last_update_timestamp = datetime.now() - timedelta(minutes=11) self.moab_adapter._moab_status._last_update = last_update_timestamp response = run_async( self.moab_adapter.resource_status, AttributeDict( resource_id=1390065, remote_resource_uuid=1351043, created=creation_timestamp, ), ) self.assertEqual(response.resource_status, ResourceStatus.Deleted) def test_exception_handling(self): def test_exception_handling(to_raise, to_catch): with self.assertRaises(to_catch): with self.moab_adapter.handle_exceptions(): raise to_raise matrix = [ (asyncio.TimeoutError(), TardisTimeout), ( asyncssh.Error(code=255, reason="Test", lang="Test"), TardisResourceStatusUpdateFailed, ), (IndexError, TardisResourceStatusUpdateFailed), (TardisResourceStatusUpdateFailed, TardisResourceStatusUpdateFailed), ( CommandExecutionFailure( message="Run test command", exit_code=1, stdout="Test", stderr="Test", ), TardisResourceStatusUpdateFailed, ), (Exception, TardisError), ] for to_raise, to_catch in matrix: test_exception_handling(to_raise, to_catch) def test_check_remote_resource_uuid(self): with self.assertRaises(TardisError): self.moab_adapter.check_remote_resource_uuid( AttributeDict(remote_resource_uuid=1), regex=r"^(\d)$", response="2")
class TestSlurmAdapter(TestCase): mock_config_patcher = None mock_executor_patcher = None @classmethod def setUpClass(cls): cls.mock_config_patcher = patch( "tardis.adapters.batchsystems.slurm.Configuration" ) cls.mock_config = cls.mock_config_patcher.start() cls.mock_executor_patcher = patch( "tardis.adapters.batchsystems.slurm.ShellExecutor" ) cls.mock_executor = cls.mock_executor_patcher.start() @classmethod def tearDownClass(cls): cls.mock_config_patcher.stop() cls.mock_executor_patcher.stop() def setUp(self): self.cpu_ratio = 0.5 self.memory_ratio = 0.25 self.command = 'sinfo --Format="statelong,cpusstate,allocmem,memory,features,nodehost" -e --noheader -r --partition=test_part' # noqa B950 self.command_wo_options = 'sinfo --Format="statelong,cpusstate,allocmem,memory,features,nodehost" -e --noheader -r' # noqa B950 self.setup_config_mock( options=AttributeDict({"long": {"partition": "test_part"}}) ) self.slurm_adapter = SlurmAdapter() def tearDown(self): self.mock_executor.reset_mock() def setup_config_mock(self, options=None): self.config = self.mock_config.return_value self.config.BatchSystem.max_age = 10 self.config.BatchSystem.executor = self.mock_executor.return_value if options: self.config.BatchSystem.options = options else: self.config.BatchSystem.options = {} def test_disintegrate_machine(self): self.assertIsNone( run_async(self.slurm_adapter.disintegrate_machine, drone_uuid="test") ) @mock_executor_run_command(stdout=SINFO_RETURN) def test_drain_machine(self): run_async(self.slurm_adapter.drain_machine, drone_uuid="VM-1") self.mock_executor.return_value.run_command.assert_called_with( "scontrol update NodeName=host-10-18-1-1 State=DRAIN Reason='COBalD/TARDIS'" ) self.mock_executor.reset_mock() self.assertIsNone( run_async(self.slurm_adapter.drain_machine, drone_uuid="not_exists") ) @mock_executor_run_command( stdout="", raise_exception=CommandExecutionFailure( message="Failed", stdout="Failed", stderr="Failed", exit_code=2 ), ) def test_update_exception(self): with self.assertLogs(level=logging.WARNING): self.assertIsNone(run_async(self.slurm_adapter._slurm_status.update_status)) @mock_executor_run_command(stdout=SINFO_RETURN) def test_drain_machine_without_options(self): self.setup_config_mock() self.slurm_adapter = SlurmAdapter() run_async(self.slurm_adapter.drain_machine, drone_uuid="VM-1") self.mock_executor.return_value.run_command.assert_called_with( "scontrol update NodeName=host-10-18-1-1 State=DRAIN Reason='COBalD/TARDIS'" ) self.mock_executor.reset_mock() def test_integrate_machine(self): self.assertIsNone( run_async(self.slurm_adapter.integrate_machine, drone_uuid="VM-1") ) @mock_executor_run_command(stdout=SINFO_RETURN) def test_get_resource_ratios(self): self.assertEqual( list(run_async(self.slurm_adapter.get_resource_ratios, drone_uuid="VM-1")), [self.cpu_ratio, self.memory_ratio], ) self.mock_executor.return_value.run_command.assert_called_with(self.command) self.mock_executor.reset_mock() self.assertEqual( run_async(self.slurm_adapter.get_resource_ratios, drone_uuid="not_exists"), {}, ) @mock_executor_run_command(stdout=SINFO_RETURN) def test_get_resource_ratios_without_options(self): self.setup_config_mock() del self.config.BatchSystem.options self.slurm_adapter = SlurmAdapter() self.assertEqual( list(run_async(self.slurm_adapter.get_resource_ratios, drone_uuid="VM-1")), [self.cpu_ratio, self.memory_ratio], ) self.mock_executor.return_value.run_command.assert_called_with( self.command_wo_options ) @mock_executor_run_command(stdout=SINFO_RETURN) def test_get_allocation(self): self.assertEqual( run_async(self.slurm_adapter.get_allocation, drone_uuid="VM-1"), max([self.cpu_ratio, self.memory_ratio]), ) self.mock_executor.return_value.run_command.assert_called_with(self.command) self.assertEqual( run_async(self.slurm_adapter.get_allocation, drone_uuid="not_exists"), 0.0, ) @mock_executor_run_command(stdout=SINFO_RETURN) def test_get_machine_status(self): state_mapping = { "VM-1": MachineStatus.Available, "not_exists": MachineStatus.NotAvailable, "draining_m": MachineStatus.Draining, "idle_m": MachineStatus.Available, "drained_m": MachineStatus.NotAvailable, "pwr_up_m": MachineStatus.NotAvailable, } for machine, state in state_mapping.items(): self.assertEqual( run_async(self.slurm_adapter.get_machine_status, drone_uuid=machine), state, ) self.mock_executor.reset_mock() self.mock_executor.return_value.run_command.side_effect = ( CommandExecutionFailure(message="Test", exit_code=123, stderr="Test") ) with self.assertLogs(level="WARN"): with self.assertRaises(CommandExecutionFailure): attributes = { "Machine": "Machine", "State": "State", "Activity": "Activity", "TardisDroneUuid": "TardisDroneUuid", } run_async( partial( slurm_status_updater, self.config.BatchSystem.options, attributes, self.mock_executor.return_value, ) ) self.mock_executor.return_value.run_command.assert_called_with( self.command ) self.mock_executor.return_value.run_command.side_effect = None @mock_executor_run_command(stdout=SINFO_RETURN) def test_get_utilisation(self): self.assertEqual( run_async(self.slurm_adapter.get_utilisation, drone_uuid="VM-1"), min([self.cpu_ratio, self.memory_ratio]), ) self.mock_executor.return_value.run_command.assert_called_with(self.command) self.assertEqual( run_async(self.slurm_adapter.get_utilisation, drone_uuid="not_exists"), 0.0, ) def test_machine_meta_data_translation(self): self.assertEqual( AttributeDict(Cores=1, Memory=1000, Disk=1000), self.slurm_adapter.machine_meta_data_translation_mapping, )