Ejemplo n.º 1
0
def ray_start_cluster():
    node_args = {
        "num_cpus": 4,
        "_internal_config": json.dumps({
            "initial_reconstruction_timeout_milliseconds": 1000,
            "num_heartbeats_timeout": 10
        })
    }
    # Start with 3 worker nodes and 4 cores each.
    cluster = Cluster(
        initialize_head=True, connect=True, head_node_args=node_args)
    workers = []
    for _ in range(3):
        workers.append(cluster.add_node(**node_args))
    cluster.wait_for_nodes()
    yield cluster
    ray.shutdown()
    cluster.shutdown()
Ejemplo n.º 2
0
class RayExecutorQueueTest(unittest.TestCase):
    def setUp(self):
        self.trial_executor = RayTrialExecutor(queue_trials=True,
                                               refresh_period=0)
        self.cluster = Cluster(initialize_head=True,
                               connect=True,
                               head_node_args={
                                   "num_cpus":
                                   1,
                                   "_internal_config":
                                   json.dumps({"num_heartbeats_timeout": 10})
                               })
        # Pytest doesn't play nicely with imports
        _register_all()

    def tearDown(self):
        ray.shutdown()
        self.cluster.shutdown()
        _register_all()  # re-register the evicted objects

    def testQueueTrial(self):
        """Tests that reset handles NotImplemented properly."""
        def create_trial(cpu, gpu=0):
            return Trial("__fake", resources=Resources(cpu=cpu, gpu=gpu))

        cpu_only = create_trial(1, 0)
        self.assertTrue(self.trial_executor.has_resources(cpu_only.resources))
        self.trial_executor.start_trial(cpu_only)

        gpu_only = create_trial(0, 1)
        self.assertTrue(self.trial_executor.has_resources(gpu_only.resources))

    def testHeadBlocking(self):
        def create_trial(cpu, gpu=0):
            return Trial("__fake", resources=Resources(cpu=cpu, gpu=gpu))

        gpu_trial = create_trial(1, 1)
        self.assertTrue(self.trial_executor.has_resources(gpu_trial.resources))
        self.trial_executor.start_trial(gpu_trial)

        # TODO(rliaw): This behavior is probably undesirable, but right now
        # trials with different resource requirements is not often used.
        cpu_only_trial = create_trial(1, 0)
        self.assertFalse(
            self.trial_executor.has_resources(cpu_only_trial.resources))

        self.cluster.add_node(num_cpus=1, num_gpus=1)
        self.cluster.wait_for_nodes()

        self.assertTrue(
            self.trial_executor.has_resources(cpu_only_trial.resources))
        self.trial_executor.start_trial(cpu_only_trial)

        cpu_only_trial2 = create_trial(1, 0)
        self.assertTrue(
            self.trial_executor.has_resources(cpu_only_trial2.resources))
        self.trial_executor.start_trial(cpu_only_trial2)

        cpu_only_trial3 = create_trial(1, 0)
        self.assertFalse(
            self.trial_executor.has_resources(cpu_only_trial3.resources))