class RayTrialExecutorTest(unittest.TestCase): def setUp(self): self.trial_executor = RayTrialExecutor(queue_trials=False) ray.init() def tearDown(self): ray.shutdown() _register_all() # re-register the evicted objects def _get_trials(self): trials = self.generate_trials( { "run": "PPO", "config": { "bar": { "grid_search": [True, False] }, "foo": { "grid_search": [1, 2, 3] }, }, }, "grid_search") return list(trials) def testStartStop(self): trial = Trial("__fake") self.trial_executor.start_trial(trial) running = self.trial_executor.get_running_trials() self.assertEqual(1, len(running)) self.trial_executor.stop_trial(trial) def testSaveRestore(self): trial = Trial("__fake") self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.save(trial, Checkpoint.DISK) self.trial_executor.restore(trial) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def generate_trials(self, spec, name): suggester = BasicVariantGenerator({name: spec}) return suggester.next_trials()
class RayTrialExecutorTest(unittest.TestCase): def setUp(self): self.trial_executor = RayTrialExecutor() ray.init(num_cpus=2, ignore_reinit_error=True) _register_all() # Needed for flaky tests def tearDown(self): ray.shutdown() _register_all() # re-register the evicted objects def _simulate_starting_trial(self, trial): future_result = self.trial_executor.get_next_executor_event( live_trials={trial}, next_trial_exists=True) assert future_result.type == ExecutorEventType.PG_READY self.assertTrue(self.trial_executor.start_trial(trial)) self.assertEqual(Trial.RUNNING, trial.status) def _simulate_getting_result(self, trial): while True: future_result = self.trial_executor.get_next_executor_event( live_trials={trial}, next_trial_exists=False) if future_result.type == ExecutorEventType.TRAINING_RESULT: break if isinstance(future_result.result, list): for r in future_result.result: trial.update_last_result(r) else: trial.update_last_result(future_result.result) def _simulate_saving(self, trial): checkpoint = self.trial_executor.save(trial, Checkpoint.PERSISTENT) self.assertEqual(checkpoint, trial.saving_to) self.assertEqual(trial.checkpoint.value, None) future_result = self.trial_executor.get_next_executor_event( live_trials={trial}, next_trial_exists=False) assert future_result.type == ExecutorEventType.SAVING_RESULT self.process_trial_save(trial, future_result.result) self.assertEqual(checkpoint, trial.checkpoint) def testStartStop(self): trial = Trial("__fake") self._simulate_starting_trial(trial) self.trial_executor.stop_trial(trial) def testAsyncSave(self): """Tests that saved checkpoint value not immediately set.""" trial = Trial("__fake") self._simulate_starting_trial(trial) self._simulate_getting_result(trial) self._simulate_saving(trial) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testSaveRestore(self): trial = Trial("__fake") self._simulate_starting_trial(trial) self._simulate_getting_result(trial) self._simulate_saving(trial) self.trial_executor.restore(trial) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testPauseResume(self): """Tests that pausing works for trials in flight.""" trial = Trial("__fake") self._simulate_starting_trial(trial) self.trial_executor.pause_trial(trial) self.assertEqual(Trial.PAUSED, trial.status) self._simulate_starting_trial(trial) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testSavePauseResumeErrorRestore(self): """Tests that pause checkpoint does not replace restore checkpoint.""" trial = Trial("__fake") self._simulate_starting_trial(trial) self._simulate_getting_result(trial) # Save self._simulate_saving(trial) # Train self.trial_executor.continue_training(trial) self._simulate_getting_result(trial) # Pause self.trial_executor.pause_trial(trial) self.assertEqual(Trial.PAUSED, trial.status) self.assertEqual(trial.checkpoint.storage, Checkpoint.MEMORY) # Resume self._simulate_starting_trial(trial) # Error trial.set_status(Trial.ERROR) # Restore self.trial_executor.restore(trial) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testStartFailure(self): _global_registry.register(TRAINABLE_CLASS, "asdf", None) trial = Trial("asdf", resources=Resources(1, 0)) self.trial_executor.start_trial(trial) self.assertEqual(Trial.ERROR, trial.status) def testPauseResume2(self): """Tests that pausing works for trials being processed.""" trial = Trial("__fake") self._simulate_starting_trial(trial) self._simulate_getting_result(trial) self.trial_executor.pause_trial(trial) self.assertEqual(Trial.PAUSED, trial.status) self._simulate_starting_trial(trial) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def _testPauseAndStart(self, result_buffer_length): """Tests that unpausing works for trials being processed.""" os.environ["TUNE_RESULT_BUFFER_LENGTH"] = f"{result_buffer_length}" os.environ["TUNE_RESULT_BUFFER_MIN_TIME_S"] = "1" # Need a new trial executor so the ENV vars are parsed again self.trial_executor = RayTrialExecutor() base = max(result_buffer_length, 1) trial = Trial("__fake") self._simulate_starting_trial(trial) self._simulate_getting_result(trial) self.assertEqual(trial.last_result.get(TRAINING_ITERATION), base) self.trial_executor.pause_trial(trial) self.assertEqual(Trial.PAUSED, trial.status) self._simulate_starting_trial(trial) self._simulate_getting_result(trial) self.assertEqual(trial.last_result.get(TRAINING_ITERATION), base * 2) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testPauseAndStartNoBuffer(self): self._testPauseAndStart(0) def testPauseAndStartTrivialBuffer(self): self._testPauseAndStart(1) def testPauseAndStartActualBuffer(self): self._testPauseAndStart(8) def testNoResetTrial(self): """Tests that reset handles NotImplemented properly.""" trial = Trial("__fake") self._simulate_starting_trial(trial) exists = self.trial_executor.reset_trial(trial, {}, "modified_mock") self.assertEqual(exists, False) self.assertEqual(Trial.RUNNING, trial.status) def testResetTrial(self): """Tests that reset works as expected.""" class B(Trainable): def step(self): return dict(timesteps_this_iter=1, done=True) def reset_config(self, config): self.config = config return True trials = self.generate_trials( { "run": B, "config": { "foo": 0 }, }, "grid_search", ) trial = trials[0] self._simulate_starting_trial(trial) exists = self.trial_executor.reset_trial(trial, {"hi": 1}, "modified_mock") self.assertEqual(exists, True) self.assertEqual(trial.config.get("hi"), 1) self.assertEqual(trial.experiment_tag, "modified_mock") self.assertEqual(Trial.RUNNING, trial.status) def testTrialCleanup(self): class B(Trainable): def step(self): print("Step start") time.sleep(4) print("Step done") return dict(my_metric=1, timesteps_this_iter=1, done=True) def reset_config(self, config): self.config = config return True def cleanup(self): print("Cleanup start") time.sleep(4) print("Cleanup done") # First check if the trials terminate gracefully by default trials = self.generate_trials( { "run": B, "config": { "foo": 0 }, }, "grid_search", ) trial = trials[0] self._simulate_starting_trial(trial) time.sleep(1) print("Stop trial") self.trial_executor.stop_trial(trial) print("Start trial cleanup") start = time.time() self.trial_executor.cleanup([trial]) # 4 - 1 + 4. self.assertGreaterEqual(time.time() - start, 6) # Check forceful termination. It should run for much less than the # sleep periods in the Trainable trials = self.generate_trials( { "run": B, "config": { "foo": 0 }, }, "grid_search", ) trial = trials[0] os.environ["TUNE_FORCE_TRIAL_CLEANUP_S"] = "1" self.trial_executor = RayTrialExecutor() os.environ["TUNE_FORCE_TRIAL_CLEANUP_S"] = "0" self._simulate_starting_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) time.sleep(1) print("Stop trial") self.trial_executor.stop_trial(trial) print("Start trial cleanup") start = time.time() self.trial_executor.cleanup([trial]) # less than 1 with some margin. self.assertLess(time.time() - start, 2.0) # also check if auto-filled metrics were returned self.assertIn(PID, trial.last_result) self.assertIn(TRIAL_ID, trial.last_result) self.assertNotIn("my_metric", trial.last_result) @staticmethod def generate_trials(spec, name): suggester = BasicVariantGenerator() suggester.add_configurations({name: spec}) trials = [] while not suggester.is_finished(): trial = suggester.next_trial() if trial: trials.append(trial) else: break return trials def process_trial_save(self, trial, checkpoint_value): """Simulates trial runner save.""" checkpoint = trial.saving_to checkpoint.value = checkpoint_value trial.on_checkpoint(checkpoint)
class RayTrialExecutorTest(unittest.TestCase): def setUp(self): # Wait up to five seconds for placement groups when starting a trial os.environ["TUNE_PLACEMENT_GROUP_WAIT_S"] = "5" # Block for results even when placement groups are pending os.environ["TUNE_TRIAL_STARTUP_GRACE_PERIOD"] = "0" os.environ["TUNE_TRIAL_RESULT_WAIT_TIME_S"] = "99999" self.trial_executor = RayTrialExecutor(queue_trials=False) ray.init(num_cpus=2, ignore_reinit_error=True) _register_all() # Needed for flaky tests def tearDown(self): ray.shutdown() _register_all() # re-register the evicted objects def testStartStop(self): trial = Trial("__fake") self.trial_executor.start_trial(trial) running = self.trial_executor.get_running_trials() self.assertEqual(1, len(running)) self.trial_executor.stop_trial(trial) def testAsyncSave(self): """Tests that saved checkpoint value not immediately set.""" trial = Trial("__fake") self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) trial.last_result = self.trial_executor.fetch_result(trial)[-1] checkpoint = self.trial_executor.save(trial, Checkpoint.PERSISTENT) self.assertEqual(checkpoint, trial.saving_to) self.assertEqual(trial.checkpoint.value, None) self.process_trial_save(trial) self.assertEqual(checkpoint, trial.checkpoint) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testSaveRestore(self): trial = Trial("__fake") self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) trial.last_result = self.trial_executor.fetch_result(trial)[-1] self.trial_executor.save(trial, Checkpoint.PERSISTENT) self.process_trial_save(trial) self.trial_executor.restore(trial) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testPauseResume(self): """Tests that pausing works for trials in flight.""" trial = Trial("__fake") self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.pause_trial(trial) self.assertEqual(Trial.PAUSED, trial.status) self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testSavePauseResumeErrorRestore(self): """Tests that pause checkpoint does not replace restore checkpoint.""" trial = Trial("__fake") self.trial_executor.start_trial(trial) trial.last_result = self.trial_executor.fetch_result(trial)[-1] # Save checkpoint = self.trial_executor.save(trial, Checkpoint.PERSISTENT) self.assertEqual(Trial.RUNNING, trial.status) self.assertEqual(checkpoint.storage, Checkpoint.PERSISTENT) # Process save result (simulates trial runner) self.process_trial_save(trial) # Train self.trial_executor.continue_training(trial) trial.last_result = self.trial_executor.fetch_result(trial)[-1] # Pause self.trial_executor.pause_trial(trial) self.assertEqual(Trial.PAUSED, trial.status) self.assertEqual(trial.checkpoint.storage, Checkpoint.MEMORY) # Resume self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) # Error trial.set_status(Trial.ERROR) # Restore self.trial_executor.restore(trial) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testStartFailure(self): _global_registry.register(TRAINABLE_CLASS, "asdf", None) trial = Trial("asdf", resources=Resources(1, 0)) self.trial_executor.start_trial(trial) self.assertEqual(Trial.ERROR, trial.status) def testPauseResume2(self): """Tests that pausing works for trials being processed.""" trial = Trial("__fake") self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.fetch_result(trial) checkpoint = self.trial_executor.pause_trial(trial) self.assertEqual(Trial.PAUSED, trial.status) self.trial_executor.start_trial(trial, checkpoint) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def _testPauseUnpause(self, result_buffer_length): """Tests that unpausing works for trials being processed.""" os.environ["TUNE_RESULT_BUFFER_LENGTH"] = f"{result_buffer_length}" os.environ["TUNE_RESULT_BUFFER_MIN_TIME_S"] = "1" # Need a new trial executor so the ENV vars are parsed again self.trial_executor = RayTrialExecutor(queue_trials=False) base = max(result_buffer_length, 1) trial = Trial("__fake") self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) trial.last_result = self.trial_executor.fetch_result(trial)[-1] self.assertEqual(trial.last_result.get(TRAINING_ITERATION), base) self.trial_executor.pause_trial(trial) self.assertEqual(Trial.PAUSED, trial.status) self.trial_executor.unpause_trial(trial) self.assertEqual(Trial.PENDING, trial.status) self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) trial.last_result = self.trial_executor.fetch_result(trial)[-1] self.assertEqual(trial.last_result.get(TRAINING_ITERATION), base * 2) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testPauseUnpauseNoBuffer(self): self._testPauseUnpause(0) def testPauseUnpauseTrivialBuffer(self): self._testPauseUnpause(1) def testPauseUnpauseActualBuffer(self): self._testPauseUnpause(8) def testNoResetTrial(self): """Tests that reset handles NotImplemented properly.""" trial = Trial("__fake") self.trial_executor.start_trial(trial) exists = self.trial_executor.reset_trial(trial, {}, "modified_mock") self.assertEqual(exists, False) self.assertEqual(Trial.RUNNING, trial.status) def testResetTrial(self): """Tests that reset works as expected.""" class B(Trainable): def step(self): return dict(timesteps_this_iter=1, done=True) def reset_config(self, config): self.config = config return True trials = self.generate_trials({ "run": B, "config": { "foo": 0 }, }, "grid_search") trial = trials[0] self.trial_executor.start_trial(trial) exists = self.trial_executor.reset_trial(trial, {"hi": 1}, "modified_mock") self.assertEqual(exists, True) self.assertEqual(trial.config.get("hi"), 1) self.assertEqual(trial.experiment_tag, "modified_mock") self.assertEqual(Trial.RUNNING, trial.status) def testForceTrialCleanup(self): class B(Trainable): def step(self): print("Step start") time.sleep(10) print("Step done") return dict(my_metric=1, timesteps_this_iter=1, done=True) def reset_config(self, config): self.config = config return True def cleanup(self): print("Cleanup start") time.sleep(10) print("Cleanup done") # First check if the trials terminate gracefully by default trials = self.generate_trials({ "run": B, "config": { "foo": 0 }, }, "grid_search") trial = trials[0] self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) time.sleep(5) print("Stop trial") self.trial_executor.stop_trial(trial) print("Start trial cleanup") start = time.time() self.trial_executor.cleanup([trial]) self.assertGreaterEqual(time.time() - start, 12.0) # Check forceful termination. It should run for much less than the # sleep periods in the Trainable trials = self.generate_trials({ "run": B, "config": { "foo": 0 }, }, "grid_search") trial = trials[0] os.environ["TUNE_FORCE_TRIAL_CLEANUP_S"] = "1" self.trial_executor = RayTrialExecutor(queue_trials=False) os.environ["TUNE_FORCE_TRIAL_CLEANUP_S"] = "0" self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) time.sleep(5) print("Stop trial") self.trial_executor.stop_trial(trial) print("Start trial cleanup") start = time.time() self.trial_executor.cleanup([trial]) self.assertLess(time.time() - start, 5.0) # also check if auto-filled metrics were returned self.assertIn(PID, trial.last_result) self.assertIn(TRIAL_ID, trial.last_result) self.assertNotIn("my_metric", trial.last_result) @staticmethod def generate_trials(spec, name): suggester = BasicVariantGenerator() suggester.add_configurations({name: spec}) trials = [] while not suggester.is_finished(): trial = suggester.next_trial() if trial: trials.append(trial) else: break return trials def process_trial_save(self, trial): """Simulates trial runner save.""" checkpoint = trial.saving_to checkpoint_value = self.trial_executor.fetch_result(trial)[-1] checkpoint.value = checkpoint_value trial.on_checkpoint(checkpoint)
class RayTrialExecutorTest(unittest.TestCase): def setUp(self): self.trial_executor = RayTrialExecutor(queue_trials=False) ray.init() _register_all() # Needed for flaky tests def tearDown(self): ray.shutdown() _register_all() # re-register the evicted objects def testStartStop(self): trial = Trial("__fake") self.trial_executor.start_trial(trial) running = self.trial_executor.get_running_trials() self.assertEqual(1, len(running)) self.trial_executor.stop_trial(trial) def testSaveRestore(self): trial = Trial("__fake") self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.save(trial, Checkpoint.DISK) self.trial_executor.restore(trial) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testPauseResume(self): """Tests that pausing works for trials in flight.""" trial = Trial("__fake") self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.pause_trial(trial) self.assertEqual(Trial.PAUSED, trial.status) self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testStartFailure(self): _global_registry.register(TRAINABLE_CLASS, "asdf", None) trial = Trial("asdf", resources=Resources(1, 0)) self.trial_executor.start_trial(trial) self.assertEqual(Trial.ERROR, trial.status) def testPauseResume2(self): """Tests that pausing works for trials being processed.""" trial = Trial("__fake") self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.fetch_result(trial) self.trial_executor.pause_trial(trial) self.assertEqual(Trial.PAUSED, trial.status) self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testNoResetTrial(self): """Tests that reset handles NotImplemented properly.""" trial = Trial("__fake") self.trial_executor.start_trial(trial) exists = self.trial_executor.reset_trial(trial, {}, "modified_mock") self.assertEqual(exists, False) self.assertEqual(Trial.RUNNING, trial.status) def testResetTrial(self): """Tests that reset works as expected.""" class B(Trainable): def _train(self): return dict(timesteps_this_iter=1, done=True) def reset_config(self, config): self.config = config return True trials = self.generate_trials({ "run": B, "config": { "foo": 0 }, }, "grid_search") trial = trials[0] self.trial_executor.start_trial(trial) exists = self.trial_executor.reset_trial(trial, {"hi": 1}, "modified_mock") self.assertEqual(exists, True) self.assertEqual(trial.config.get("hi"), 1) self.assertEqual(trial.experiment_tag, "modified_mock") self.assertEqual(Trial.RUNNING, trial.status) def generate_trials(self, spec, name): suggester = BasicVariantGenerator() suggester.add_configurations({name: spec}) return suggester.next_trials()
class RayTrialExecutorTest(unittest.TestCase): def setUp(self): self.trial_executor = RayTrialExecutor(queue_trials=False) ray.init() _register_all() # Needed for flaky tests def tearDown(self): ray.shutdown() _register_all() # re-register the evicted objects def testStartStop(self): trial = Trial("__fake") self.trial_executor.start_trial(trial) running = self.trial_executor.get_running_trials() self.assertEqual(1, len(running)) self.trial_executor.stop_trial(trial) def testAsyncSave(self): """Tests that saved checkpoint value not immediately set.""" trial = Trial("__fake") self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) trial.last_result = self.trial_executor.fetch_result(trial) checkpoint = self.trial_executor.save(trial, Checkpoint.PERSISTENT) self.assertEqual(checkpoint, trial.saving_to) self.assertEqual(trial.checkpoint.value, None) self.process_trial_save(trial) self.assertEqual(checkpoint, trial.checkpoint) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testSaveRestore(self): trial = Trial("__fake") self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) trial.last_result = self.trial_executor.fetch_result(trial) self.trial_executor.save(trial, Checkpoint.PERSISTENT) self.process_trial_save(trial) self.trial_executor.restore(trial) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testPauseResume(self): """Tests that pausing works for trials in flight.""" trial = Trial("__fake") self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.pause_trial(trial) self.assertEqual(Trial.PAUSED, trial.status) self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testSavePauseResumeErrorRestore(self): """Tests that pause checkpoint does not replace restore checkpoint.""" trial = Trial("__fake") self.trial_executor.start_trial(trial) trial.last_result = self.trial_executor.fetch_result(trial) # Save checkpoint = self.trial_executor.save(trial, Checkpoint.PERSISTENT) self.assertEqual(Trial.RUNNING, trial.status) self.assertEqual(checkpoint.storage, Checkpoint.PERSISTENT) # Process save result (simulates trial runner) self.process_trial_save(trial) # Train self.trial_executor.continue_training(trial) trial.last_result = self.trial_executor.fetch_result(trial) # Pause self.trial_executor.pause_trial(trial) self.assertEqual(Trial.PAUSED, trial.status) self.assertEqual(trial.checkpoint.storage, Checkpoint.MEMORY) # Resume self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) # Error trial.set_status(Trial.ERROR) # Restore self.trial_executor.restore(trial) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testStartFailure(self): _global_registry.register(TRAINABLE_CLASS, "asdf", None) trial = Trial("asdf", resources=Resources(1, 0)) self.trial_executor.start_trial(trial) self.assertEqual(Trial.ERROR, trial.status) def testPauseResume2(self): """Tests that pausing works for trials being processed.""" trial = Trial("__fake") self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.fetch_result(trial) checkpoint = self.trial_executor.pause_trial(trial) self.assertEqual(Trial.PAUSED, trial.status) self.trial_executor.start_trial(trial, checkpoint) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testPauseUnpause(self): """Tests that unpausing works for trials being processed.""" trial = Trial("__fake") self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) trial.last_result = self.trial_executor.fetch_result(trial) self.assertEqual(trial.last_result.get(TRAINING_ITERATION), 1) self.trial_executor.pause_trial(trial) self.assertEqual(Trial.PAUSED, trial.status) self.trial_executor.unpause_trial(trial) self.assertEqual(Trial.PENDING, trial.status) self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) trial.last_result = self.trial_executor.fetch_result(trial) self.assertEqual(trial.last_result.get(TRAINING_ITERATION), 2) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testNoResetTrial(self): """Tests that reset handles NotImplemented properly.""" trial = Trial("__fake") self.trial_executor.start_trial(trial) exists = self.trial_executor.reset_trial(trial, {}, "modified_mock") self.assertEqual(exists, False) self.assertEqual(Trial.RUNNING, trial.status) def testResetTrial(self): """Tests that reset works as expected.""" class B(Trainable): def step(self): return dict(timesteps_this_iter=1, done=True) def reset_config(self, config): self.config = config return True trials = self.generate_trials({ "run": B, "config": { "foo": 0 }, }, "grid_search") trial = trials[0] self.trial_executor.start_trial(trial) exists = self.trial_executor.reset_trial(trial, {"hi": 1}, "modified_mock") self.assertEqual(exists, True) self.assertEqual(trial.config.get("hi"), 1) self.assertEqual(trial.experiment_tag, "modified_mock") self.assertEqual(Trial.RUNNING, trial.status) @staticmethod def generate_trials(spec, name): suggester = BasicVariantGenerator() suggester.add_configurations({name: spec}) return suggester.next_trials() def process_trial_save(self, trial): """Simulates trial runner save.""" checkpoint = trial.saving_to checkpoint_value = self.trial_executor.fetch_result(trial) checkpoint.value = checkpoint_value trial.on_checkpoint(checkpoint)
class RayTrialExecutorTest(unittest.TestCase): def setUp(self): self.trial_executor = RayTrialExecutor(queue_trials=False) ray.init() def tearDown(self): ray.shutdown() _register_all() # re-register the evicted objects def testStartStop(self): trial = Trial("__fake") self.trial_executor.start_trial(trial) running = self.trial_executor.get_running_trials() self.assertEqual(1, len(running)) self.trial_executor.stop_trial(trial) def testSaveRestore(self): trial = Trial("__fake") self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.save(trial, Checkpoint.DISK) self.trial_executor.restore(trial) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testPauseResume(self): """Tests that pausing works for trials in flight.""" trial = Trial("__fake") self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.pause_trial(trial) self.assertEqual(Trial.PAUSED, trial.status) self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testStartFailure(self): _global_registry.register(TRAINABLE_CLASS, "asdf", None) trial = Trial("asdf", resources=Resources(1, 0)) self.trial_executor.start_trial(trial) self.assertEqual(Trial.ERROR, trial.status) def testPauseResume2(self): """Tests that pausing works for trials being processed.""" trial = Trial("__fake") self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.fetch_result(trial) self.trial_executor.pause_trial(trial) self.assertEqual(Trial.PAUSED, trial.status) self.trial_executor.start_trial(trial) self.assertEqual(Trial.RUNNING, trial.status) self.trial_executor.stop_trial(trial) self.assertEqual(Trial.TERMINATED, trial.status) def testNoResetTrial(self): """Tests that reset handles NotImplemented properly.""" trial = Trial("__fake") self.trial_executor.start_trial(trial) exists = self.trial_executor.reset_trial(trial, {}, "modified_mock") self.assertEqual(exists, False) self.assertEqual(Trial.RUNNING, trial.status) def testResetTrial(self): """Tests that reset works as expected.""" class B(Trainable): def _train(self): return dict(timesteps_this_iter=1, done=True) def reset_config(self, config): self.config = config return True trials = self.generate_trials({ "run": B, "config": { "foo": 0 }, }, "grid_search") trial = trials[0] self.trial_executor.start_trial(trial) exists = self.trial_executor.reset_trial(trial, {"hi": 1}, "modified_mock") self.assertEqual(exists, True) self.assertEqual(trial.config.get("hi"), 1) self.assertEqual(trial.experiment_tag, "modified_mock") self.assertEqual(Trial.RUNNING, trial.status) def generate_trials(self, spec, name): suggester = BasicVariantGenerator() suggester.add_configurations({name: spec}) return suggester.next_trials()