def ray_start_cluster(): node_args = { "num_cpus": 4, "_internal_config": json.dumps({ "initial_reconstruction_timeout_milliseconds": 1000, "num_heartbeats_timeout": 10 }) } # Start with 3 worker nodes and 4 cores each. cluster = Cluster( initialize_head=True, connect=True, head_node_args=node_args) workers = [] for _ in range(3): workers.append(cluster.add_node(**node_args)) cluster.wait_for_nodes() yield cluster ray.shutdown() cluster.shutdown()
class RayExecutorQueueTest(unittest.TestCase): def setUp(self): self.trial_executor = RayTrialExecutor(queue_trials=True, refresh_period=0) self.cluster = Cluster(initialize_head=True, connect=True, head_node_args={ "num_cpus": 1, "_internal_config": json.dumps({"num_heartbeats_timeout": 10}) }) # Pytest doesn't play nicely with imports _register_all() def tearDown(self): ray.shutdown() self.cluster.shutdown() _register_all() # re-register the evicted objects def testQueueTrial(self): """Tests that reset handles NotImplemented properly.""" def create_trial(cpu, gpu=0): return Trial("__fake", resources=Resources(cpu=cpu, gpu=gpu)) cpu_only = create_trial(1, 0) self.assertTrue(self.trial_executor.has_resources(cpu_only.resources)) self.trial_executor.start_trial(cpu_only) gpu_only = create_trial(0, 1) self.assertTrue(self.trial_executor.has_resources(gpu_only.resources)) def testHeadBlocking(self): def create_trial(cpu, gpu=0): return Trial("__fake", resources=Resources(cpu=cpu, gpu=gpu)) gpu_trial = create_trial(1, 1) self.assertTrue(self.trial_executor.has_resources(gpu_trial.resources)) self.trial_executor.start_trial(gpu_trial) # TODO(rliaw): This behavior is probably undesirable, but right now # trials with different resource requirements is not often used. cpu_only_trial = create_trial(1, 0) self.assertFalse( self.trial_executor.has_resources(cpu_only_trial.resources)) self.cluster.add_node(num_cpus=1, num_gpus=1) self.cluster.wait_for_nodes() self.assertTrue( self.trial_executor.has_resources(cpu_only_trial.resources)) self.trial_executor.start_trial(cpu_only_trial) cpu_only_trial2 = create_trial(1, 0) self.assertTrue( self.trial_executor.has_resources(cpu_only_trial2.resources)) self.trial_executor.start_trial(cpu_only_trial2) cpu_only_trial3 = create_trial(1, 0) self.assertFalse( self.trial_executor.has_resources(cpu_only_trial3.resources))