Esempio n. 1
0
def test_warn_cpu():
    def f(*a):
        @ray.remote(num_cpus=1)
        def f():
            pass

        ray.get(f.remote())

    with pytest.raises(TuneError):
        tune.run(f, verbose=0)

    with pytest.raises(TuneError):
        tune.run(f,
                 resources_per_trial=tune.PlacementGroupFactory([{
                     "CPU": 1
                 }]),
                 verbose=0)

    def g(*a):
        @ray.remote(num_cpus=1)
        class Actor:
            def f(self):
                pass

        a = Actor.remote()
        ray.get(a.f.remote())

    with pytest.raises(TuneError):
        tune.run(g, verbose=0)

    with pytest.raises(TuneError):
        tune.run(g,
                 resources_per_trial=tune.PlacementGroupFactory([{
                     "CPU": 1
                 }]),
                 verbose=0)
Esempio n. 2
0
def test_bad_pg_slots():
    def f(*a):
        @ray.remote(num_cpus=2)
        def f():
            pass

        ray.get(f.remote())

    with pytest.raises(TuneError):
        tune.run(
            f,
            resources_per_trial=tune.PlacementGroupFactory([{
                "CPU": 1
            }] * 2),
            verbose=0,
        )
Esempio n. 3
0
def test_pg_slots_ok():
    def f(*a):
        @ray.remote(num_cpus=1)
        def f():
            pass

        @ray.remote(num_cpus=1)
        class Actor:
            def f(self):
                pass

        ray.get(f.remote())
        a = Actor.remote()
        ray.get(a.f.remote())

    tune.run(f,
             resources_per_trial=tune.PlacementGroupFactory([{
                 "CPU": 1
             }] * 2),
             verbose=0)
Esempio n. 4
0
def test_dataset_ok():
    def f(*a):
        ray.data.range(10).show()

    tune.run(f, verbose=0)

    def g(*a):
        ctx = DatasetContext.get_current()
        ctx.scheduling_strategy = PlacementGroupSchedulingStrategy(
            ray.util.get_current_placement_group())
        ray.data.range(10).show()

    with pytest.raises(TuneError):
        tune.run(g, verbose=0)

    tune.run(g,
             resources_per_trial=tune.PlacementGroupFactory([{
                 "CPU": 1
             }] * 2),
             verbose=0)
Esempio n. 5
0
def allocation_to_pgf(alloc: List[str], resources_per_node=None):
    """ Convert AdaptDL allocation to a Placement Group Factory"""
    if not resources_per_node:
        resources_per_node = {"CPU": 1.0}
        if config.default_device() == "GPU":
            resources_per_node["GPU"] = 1.0

    def _construct_bundle(node, number_of_instances):
        resources = deepcopy(resources_per_node)
        resources["CPU"] *= number_of_instances
        if "GPU" in resources:
            resources["GPU"] *= number_of_instances
        if "adaptdl_virtual" not in node:
            resources[f"node:{node}"] = 0.01
        return resources

    assert len(alloc) > 0
    resources = [{"CPU": 0.001}]
    alloc = Counter(alloc)
    for node, res in alloc.items():
        resources.append(_construct_bundle(node, res))
    return tune.PlacementGroupFactory(resources)
Esempio n. 6
0
if not MOCK:
    # __resources_start__
    tune.run(
        train_fn,
        resources_per_trial={"cpu": 2, "gpu": 0.5, "custom_resources": {"hdd": 80}},
    )
    # __resources_end__

    # __resources_pgf_start__
    tune.run(
        train_fn,
        resources_per_trial=tune.PlacementGroupFactory(
            [
                {"CPU": 2, "GPU": 0.5, "hdd": 80},
                {"CPU": 1},
                {"CPU": 1},
            ],
            strategy="PACK",
        ),
    )
    # __resources_pgf_end__

    metric = None

    # __modin_start__
    def train_fn(config, checkpoint_dir=None):
        # some Modin operations here
        # import modin.pandas as pd
        tune.report(metric=metric)

    tune.run(
Esempio n. 7
0
# Create a cluster with 4 CPU slots available.
ray.init(num_cpus=4)

# This will error, since Tune has no resources reserved for Dataset tasks.
try:
    tune.run(objective)
except TuneError:
    print("This failed as expected")

# This runs fine, since there are 4 CPUs in the trial's placement group. The first
# CPU slot is used to run the objective function, leaving 3 for Dataset tasks.
tune.run(
    objective,
    resources_per_trial=tune.PlacementGroupFactory([{
        "CPU": 1
    }] * 4),
)
# __resource_allocation_2_end__
# fmt: on

# fmt: off
# __block_move_begin__
import ray
from ray.data.context import DatasetContext

ctx = DatasetContext.get_current()
ctx.optimize_fuse_stages = False


def map_udf(df):
Esempio n. 8
0
# This Dataset workload will use reserved cluster resources for execution.
def objective(*args):
    ray.data.range(10).show()


# Create a cluster with 4 CPU slots available.
ray.init(num_cpus=4)

# This runs smoothly since _max_cpu_fraction_per_node is set to 0.8, effectively
# reserving 1 CPU for Datasets task execution.
tune.run(
    objective,
    num_samples=4,
    resources_per_trial=tune.PlacementGroupFactory(
        [{
            "CPU": 1
        }],
        _max_cpu_fraction_per_node=0.8,
    ),
)
# __resource_allocation_2_end__
# fmt: on

# fmt: off
# __block_move_begin__
import ray
from ray.data.context import DatasetContext

ctx = DatasetContext.get_current()
ctx.optimize_fuse_stages = False