Ejemplo n.º 1
0
def test_optimize_local_bounds(perf_params, grad_params):
    fun = GoodputFunction(perf_params, grad_params, 128)
    goodput, bsz, steps = fun.optimize(1, 1, atomic_bsz_range=(64, 256))
    assert (bsz == 128), "expected bsz = 128, got {}".format(bsz)
    assert (isinstance(goodput, float))

    replicas = np.asarray(range(1, 100))
    # single-node
    goodput, bsz, steps = fun.optimize(np.ones_like(replicas),
                                       replicas,
                                       atomic_bsz_range=(64, 256))
    assert (np.all(bsz >= np.ceil(128 / replicas).astype(int)))
    assert (np.all(np.logical_or(bsz >= (64), goodput == 0.0)))
    assert (np.all(bsz <= (256)))
    assert (np.all(bsz * replicas <= 100 * 128))
    assert (bsz[0] == 128)
    assert (np.all(steps == 0))
    # multi-node
    goodput, bsz, steps = fun.optimize(replicas,
                                       replicas,
                                       atomic_bsz_range=(64, 256))
    assert (np.all(bsz >= np.ceil(128 / replicas).astype(int)))
    assert (np.all(np.logical_or(bsz >= (64), goodput == 0.0)))
    assert (np.all(bsz <= (256)))
    assert (np.all(bsz * replicas <= 100 * 128))
    assert (bsz[0] == 128)
    assert (np.all(steps == 0))
Ejemplo n.º 2
0
def test_evaluate(perf_params, grad_params):
    init_batch_size = 16
    goodput_fn = GoodputFunction(perf_params, grad_params, init_batch_size)
    # Generate a range of different goodput function arguments.
    num_nodes = np.array([1, 2, 3, 4])
    num_replicas = np.array([1, 2, 4, 8])
    atomic_bsz = np.array([8, 12, 16, 20, 24])
    accum_steps = np.array([0, 1, 2, 3, 4])
    # Cartesian product.
    num_nodes, num_replicas, atomic_bsz, accum_steps = \
        map(np.array, zip(*itertools.product(num_nodes, num_replicas,
                                             atomic_bsz, accum_steps)))
    # Only keep valid arguments.
    valid = np.logical_and(
        num_nodes <= num_replicas,
        init_batch_size <= num_replicas * atomic_bsz * accum_steps)
    num_nodes = num_nodes[valid]
    num_replicas = num_replicas[valid]
    atomic_bsz = atomic_bsz[valid]
    accum_steps = accum_steps[valid]
    # Evaluate goodput.
    goodput = goodput_fn(num_nodes, num_replicas, atomic_bsz, accum_steps)
    throughput = goodput_fn.throughput(num_nodes, num_replicas, atomic_bsz,
                                       accum_steps)
    efficiency = goodput_fn.efficiency(num_replicas * atomic_bsz *
                                       (accum_steps + 1))
    # Check basic invariants.
    assert np.all(0 <= throughput)
    assert np.all(0 <= efficiency) and np.all(efficiency <= 1)
    assert np.allclose(goodput, throughput * efficiency)
    # Increasing batch size should decrease efficiency.
    batch_size = num_replicas * atomic_bsz * (accum_steps + 1)
    sort = np.argsort(batch_size)
    assert np.all(np.diff(efficiency[sort]) <= 0)
    # All else equal, increasing atomic_bsz should increase throughput.
    for indices in groupby_indices(num_nodes, num_replicas, accum_steps):
        sort = np.argsort(atomic_bsz[indices])
        assert np.all(np.diff(throughput[indices][sort]) >= 0)
        # Increasing throughput should experience diminishing returns.
        if len(indices) > 1:
            diffx = np.diff(atomic_bsz[indices][sort])
            diffy = np.diff(throughput[indices][sort])
            assert np.all(diffx[:-1] * diffy[1:] - diffx[1:] * diffy[:-1] <= 0)
    # All else equal, scalability is sublinear with respect to num_replicas.
    for indices in groupby_indices(num_nodes, atomic_bsz, accum_steps):
        scalability = throughput / num_replicas
        sort = np.argsort(num_replicas[indices])
        assert np.all(np.diff(scalability[indices][sort]) <= 0)
Ejemplo n.º 3
0
def test_allocate_job():
    nodes = {
        "0": NodeInfo({"gpu": 1, "cpu": 500, "pods": 32}, preemptible=False),
        "1": NodeInfo({"gpu": 2, "cpu": 2000, "pods": 32}, preemptible=False),
        "2": NodeInfo({"gpu": 2, "cpu": 3000, "pods": 32}, preemptible=True),
    }
    perf_params = PerfParams(0.121, 0.00568, 0.0236, 0.00634,
                             0.0118, 0.00317, 1.14)
    grad_params = GradParams(sqr=0.00136, var=0.000502)
    goodput_fn = GoodputFunction(perf_params, grad_params, 128)
    speedup_fn = SpeedupFunction(goodput_fn, max_batch_size=1280,
                                 atomic_bsz_range=(64, 256))
    now = datetime.now()
    min_replicas = 0
    job_1 = JobInfo({"gpu": 1, "cpu": 500, "pods": 1}, speedup_fn,
                    now + timedelta(minutes=0), min_replicas, max_replicas=1)
    job_2 = JobInfo({"gpu": 1, "cpu": 1000, "pods": 1}, speedup_fn,
                    now + timedelta(minutes=1), min_replicas, max_replicas=1)
    job_3 = JobInfo({"gpu": 1, "cpu": 1000, "pods": 1}, speedup_fn,
                    now + timedelta(minutes=1), 2, max_replicas=2)
    job_4 = JobInfo({"gpu": 1, "cpu": 2000, "pods": 1}, speedup_fn,
                    now + timedelta(minutes=1), 2, max_replicas=2)
    policy = PolluxPolicy()

    assert(policy.allocate_job(job_1, nodes) == ["0"])
    assert(policy.allocate_job(job_2, nodes) == ["1"])
    assert(policy.allocate_job(job_3, nodes) == ["1", "1"])
    assert(policy.allocate_job(job_4, nodes) == [])
Ejemplo n.º 4
0
def test_unusable_node():
    # Test where one of the nodes can't be used due to one resource type.
    nodes = {
        0: NodeInfo({"gpu": 1, "cpu": 500, "pods": 32}, preemptible=False),
        1: NodeInfo({"gpu": 1, "cpu": 8000, "pods": 32}, preemptible=False),
        2: NodeInfo({"gpu": 1, "cpu": 8000, "pods": 32}, preemptible=False),
    }
    template = NodeInfo({"gpu": 1, "cpu": 8000, "pods": 32}, preemptible=True)
    perf_params = PerfParams(0.121, 0.00568, 0.0236, 0.00634,
                             0.0118, 0.00317, 1.14)
    grad_params = GradParams(sqr=0.00136, var=0.000502)
    goodput_fn = GoodputFunction(perf_params, grad_params, 128)
    speedup_fn = SpeedupFunction(goodput_fn, max_batch_size=1280,
                                 atomic_bsz_range=(64, 256))
    now = datetime.now()
    min_replicas = 0
    jobs = {
        0: JobInfo({"gpu": 1, "cpu": 1000, "pods": 1}, speedup_fn,
                   now + timedelta(minutes=0), min_replicas, max_replicas=1),
        1: JobInfo({"gpu": 1, "cpu": 1000, "pods": 1}, speedup_fn,
                   now + timedelta(minutes=1), min_replicas, max_replicas=1),
        2: JobInfo({"gpu": 1, "cpu": 1000, "pods": 1}, speedup_fn,
                   now + timedelta(minutes=2), min_replicas, max_replicas=1),
    }
    policy = PolluxPolicy()
    allocations, desired_nodes = policy.optimize(jobs, nodes, {}, template)
    # Check that more nodes are asked for.
    assert desired_nodes > 3
    # Check no job was allocated more than 1 replica.
    assert max(len(alloc) for alloc in allocations.values()) == 1
    # Check two jobs were allocated.
    assert sum(len(alloc) for alloc in allocations.values()) == 2
Ejemplo n.º 5
0
def test_optimize_accumulation(perf_params, grad_params):
    fun = GoodputFunction(perf_params, grad_params, 128)
    goodput, bsz, steps = fun.optimize(1,
                                       1,
                                       max_batch_size=1280,
                                       atomic_bsz_range=(64, 256),
                                       accumulation=True)
    assert (isinstance(goodput, float))

    replicas = np.asarray(range(1, 20))
    # single-node
    goodput, bsz, steps = fun.optimize(np.ones_like(replicas),
                                       replicas,
                                       max_batch_size=1280,
                                       atomic_bsz_range=(64, 256),
                                       accumulation=True)
    assert (np.all(
        np.logical_or(bsz >= np.ceil(128 / replicas).astype(int),
                      goodput == 0.0)))
    assert (np.all(np.logical_or(bsz >= (64), goodput == 0.0)))
    assert (np.all(bsz <= (256)))
    assert (np.all(
        np.logical_or(
            bsz * replicas * (steps + 1) < 1280 + replicas * (steps + 1),
            goodput == 0.0)))
    assert (np.all(steps <= 15))
    assert (np.all(steps >= 0))
    assert (np.all(
        np.logical_or(replicas > 1, np.logical_or(bsz == 128, steps > 0))))
    # multi-node
    goodput, bsz, steps = fun.optimize(replicas,
                                       replicas,
                                       max_batch_size=1280,
                                       atomic_bsz_range=(64, 256),
                                       accumulation=True)
    assert (np.all(
        np.logical_or(bsz >= np.ceil(128 / replicas).astype(int),
                      goodput == 0.0)))
    assert (np.all(np.logical_or(bsz >= (64), goodput == 0.0)))
    assert (np.all(bsz <= (256)))
    assert (np.all(
        np.logical_or(
            bsz * replicas * (steps + 1) < 1280 + replicas * (steps + 1),
            goodput == 0.0)))
    assert (np.all(steps <= 15))
    assert (np.all(steps >= 0))
    assert (np.all(np.logical_or(np.multiply(steps, bsz) >= 256, steps == 0)))
Ejemplo n.º 6
0
def test_optimize_no_bounds(perf_params, grad_params):
    goodput_fn = GoodputFunction(perf_params, grad_params, 128)
    goodput, bsz, steps = goodput_fn.optimize(1, 3)
    assert (bsz == 128 // 3 + 1), "expected bsz = 43, got {}".format(bsz)
    assert (isinstance(goodput, float))

    replicas = np.asarray([1, 2, 3, 4, 5])
    # single-node
    goodput, bsz, steps = goodput_fn.optimize(np.ones_like(replicas), replicas)
    assert (bsz.shape == (5, ))
    assert (np.all(bsz == np.ceil(128 / replicas).astype(int)))
    assert (goodput.shape == (5, ))
    assert (bsz[0] == 128)
    assert (np.all(steps == 0))
    # multi-node
    goodput, bsz, steps = goodput_fn.optimize(replicas, replicas)
    assert (bsz.shape == (5, ))
    assert (np.all(bsz == np.ceil(128 / replicas).astype(int)))
    assert (goodput.shape == (5, ))
    assert (bsz[0] == 128)
    assert (np.all(steps == 0))
Ejemplo n.º 7
0
def test_optimize_max_bounds(perf_params, grad_params):
    fun = GoodputFunction(perf_params, grad_params, 128)
    goodput, bsz, steps = fun.optimize(1, 1, max_batch_size=1280)
    assert (bsz == 128), "expected bsz = 128, got {}".format(bsz)
    assert (isinstance(goodput, float))

    replicas = np.asarray(range(1, 100))
    # single-node
    goodput, bsz, steps = fun.optimize(np.ones_like(replicas),
                                       replicas,
                                       max_batch_size=1280)
    assert (np.all(bsz >= np.ceil(128 / replicas).astype(int)))
    assert (np.all(bsz * replicas <= 1280 + replicas))
    assert (bsz[0] == 128)
    assert (np.all(steps == 0))
    # multi-node
    goodput, bsz, steps = fun.optimize(replicas, replicas, max_batch_size=1280)
    assert (np.all(bsz >= np.ceil(128 / replicas).astype(int)))
    assert (np.all(bsz * replicas <= 1280 + replicas))
    assert (bsz[0] == 128)
    assert (np.all(steps == 0))
Ejemplo n.º 8
0
def test_one_replica_accumulation(perf_params, grad_params):
    fun = GoodputFunction(perf_params, grad_params, 128)

    replicas = np.asarray([1])
    max_batch_sizes = np.asarray(range(128, 128 * 20, 128))
    # single-node
    for max_batch_size in max_batch_sizes:
        goodput, bsz, steps = fun.optimize(np.ones_like(replicas),
                                           replicas,
                                           max_batch_size=1280,
                                           atomic_bsz_range=(64, 256),
                                           accumulation=True)
        assert (np.all(np.logical_or(bsz >= (64), goodput == 0.0)))
        assert (np.all(bsz <= (256)))
        assert (np.all(
            np.logical_or(bsz * (steps + 1) <= max_batch_size,
                          goodput == 0.0)))
        assert (np.all(
            np.logical_or(bsz >= np.ceil(128 / replicas).astype(int),
                          goodput == 0.0)))
        assert (np.all(np.logical_or(bsz * (steps + 1) != 128, steps == 0)))
Ejemplo n.º 9
0
def test_optimize(num_nodes, total_devices=16):
    assert total_devices % num_nodes == 0
    num_devices = total_devices // num_nodes
    print("{}x{} nodes:".format(num_nodes, num_devices))
    # Make up a realistic speedup function.
    perf_params = PerfParams(0.121, 0.00568, 0.0236, 0.00634,
                             0.0118, 0.00317, 1.14)
    grad_params = GradParams(sqr=0.00136, var=0.000502)
    goodput_fn = GoodputFunction(perf_params, grad_params, 128)
    speedup_fn = SpeedupFunction(goodput_fn, max_batch_size=1280,
                                 atomic_bsz_range=(64, 256))
    now = datetime.now()
    jobs = {}
    # Add a few jobs.
    job_resources = {"nvidia.com/gpu": 1, "pods": 1}
    for i in range(16):
        creation_timestamp = now + timedelta(minutes=len(jobs)),
        max_replicas = 8
        min_replicas = 0
        key = len(jobs)
        jobs[key] = JobInfo(job_resources, speedup_fn, creation_timestamp,
                            min_replicas, max_replicas)
    # Add a few nodes.
    node_resources = {"nvidia.com/gpu": num_devices, "pods": 32}
    nodes = {i: NodeInfo(node_resources, preemptible=False)
             for i in range(num_nodes)}
    # Add a node template.
    node_template = NodeInfo(node_resources, preemptible=True)
    policy = PolluxPolicy()
    prev_allocs = {}
    for i in range(3):
        start = time.time()
        allocations, desired_nodes = \
            policy.optimize(jobs, nodes, prev_allocs, node_template)
        duration = time.time() - start
        print("optimize {}x ({}s sec):".format(i + 1, duration))
        node_count = Counter()
        for job_key, placement in allocations.items():
            assert len(placement) <= jobs[job_key].max_replicas
            for node_key in placement:
                node_count[node_key] += 1
        for node_key, count in node_count.items():
            assert count <= nodes[node_key].resources["nvidia.com/gpu"]
            assert count <= nodes[node_key].resources["pods"]
Ejemplo n.º 10
0
    def job_info(self) -> JobInfo:
        metrics = self._fetch_metrics()
        if metrics is not None:
            perf_params = metrics.perf_params
            if metrics.grad_params is not None:
                grad_params = metrics.grad_params
            else:
                grad_params = GradParams(0.0, 1.0)
            goodput_fn = GoodputFunction(perf_params, grad_params,
                                         metrics.init_batch_size)
            speedup_fn = SpeedupFunction(goodput_fn, metrics.max_batch_size,
                                         metrics.local_bsz_bounds,
                                         metrics.gradient_accumulation)
        else:
            speedup_fn = lambda n, r: r  # noqa: E731

        return JobInfo(config.job_resources(), speedup_fn,
                       self.creation_timestamp, config._JOB_MIN_REPLICAS,
                       config._JOB_MAX_REPLICAS)
Ejemplo n.º 11
0
 def _get_job_info(self, job):
     job["spec"]["template"]["spec"] = \
         set_default_resources(job["spec"]["template"]["spec"])
     resources = get_pod_requests(job["spec"]["template"]["spec"])
     hints = job.get("status", {}).get("train", {})
     max_replicas = max(2 * hints.get("maxProfiledReplicas", 0), 1)
     if job["spec"].get("maxReplicas"):
         max_replicas = min(max_replicas, job["spec"]["maxReplicas"])
     min_replicas = job["spec"].get("minReplicas", 0)
     # max_replicas should be greater or equal to min_replicas
     max_replicas = max(max_replicas, min_replicas)
     preemptible = job["spec"].get("preemptible", True)
     if {"perfParams", "initBatchSize"} <= hints.keys() and preemptible:
         max_batch_size = (hints.get("maxBatchSize")
                           or hints["initBatchSize"])
         if hints.get("localBszBounds"):
             min_local_bsz = hints["localBszBounds"][0] or 1
             # Make sure max_batch_size / replicas >= min_local_bsz
             if max_batch_size < min_local_bsz * max_replicas:
                 max_replicas = int(max_batch_size / min_local_bsz)
         perf_params = PerfParams(
             *[hints["perfParams"][k] for k in PERF_PARAMS.keys()])
         if "gradParams" in hints:
             grad_params = GradParams(hints["gradParams"]["norm"],
                                      hints["gradParams"]["var"])
         else:
             grad_params = GradParams(0.0, 1.0)
         goodput_fn = GoodputFunction(perf_params, grad_params,
                                      hints["initBatchSize"],
                                      self._metrics_options)
         speedup_fn = SpeedupFunction(
             goodput_fn, hints.get("maxBatchSize"),
             hints.get("localBszBounds"),
             hints.get("gradientAccumulation", False))
     else:
         speedup_fn = lambda n, r: r  # noqa: E731
     creation_ts = dateutil.parser.isoparse(
         job["metadata"]["creationTimestamp"])
     return JobInfo(resources, speedup_fn, creation_ts, min_replicas,
                    max_replicas, preemptible)
Ejemplo n.º 12
0
def get_goodput_fn():
    state = _metrics_state()
    if state.grad_params is None or state.perf_params is None:
        return None
    return GoodputFunction(state.perf_params, state.grad_params,
                           state.init_batch_size)
Ejemplo n.º 13
0
def test_optimize(num_nodes, total_devices=16):
    # Globals
    N_JOBS = 10
    JOBS = list(range(N_JOBS))
    random.shuffle(JOBS)

    PREEMPTIBLE_IDXS = JOBS[:len(JOBS) // 2]
    NON_PREEMPTIBLE_IDXS = JOBS[len(JOBS) // 2:]

    assert total_devices % num_nodes == 0
    num_devices = total_devices // num_nodes
    print(f"{num_nodes}x{num_devices} nodes:")
    # Make up a realistic speedup function.
    perf_params = PerfParams(0.121, 0.00568, 0.0236, 0.00634, 0.0118, 0.00317,
                             1.14)
    grad_params = GradParams(sqr=0.00136, var=0.000502)
    goodput_fn = GoodputFunction(perf_params, grad_params, 128)
    speedup_fn = SpeedupFunction(goodput_fn,
                                 max_batch_size=1280,
                                 atomic_bsz_range=(64, 256))
    now = datetime.now()
    # Add a node template.
    policy = PolluxPolicy()
    job_resources = {"nvidia.com/gpu": 1, "pods": 1}
    # Add a few nodes.
    node_resources = {"nvidia.com/gpu": num_devices, "pods": 32}
    nodes = {
        i: NodeInfo(node_resources, preemptible=False)
        for i in range(num_nodes)
    }
    node_template = NodeInfo(node_resources, preemptible=True)

    # Empty allocations
    prev_allocs = {i: [] for i in JOBS}
    for cycle in range(3):
        # Start allocation cycle
        jobs = {}
        for i in PREEMPTIBLE_IDXS:
            creation_timestamp = now + timedelta(minutes=i),
            jobs[i] = JobInfo(job_resources,
                              speedup_fn,
                              creation_timestamp,
                              min_replicas=0,
                              max_replicas=8)
        for i in NON_PREEMPTIBLE_IDXS:
            creation_timestamp = now + timedelta(minutes=i),
            jobs[i] = JobInfo(job_resources,
                              speedup_fn,
                              creation_timestamp,
                              min_replicas=2,
                              max_replicas=4,
                              preemptible=False)
        start = time.time()
        assert len(jobs) > 0
        allocations, desired_nodes = \
            policy.optimize(jobs, nodes, prev_allocs, node_template)
        duration = time.time() - start
        print(f"optimize {cycle + 1}x ({duration}s sec)")
        node_count = Counter()
        for job_key, placement in allocations.items():
            assert len(placement) <= jobs[job_key].max_replicas
            if placement:
                assert len(placement) >= jobs[job_key].min_replicas
            for node_key in placement:
                node_count[node_key] += 1
        for node_key, count in node_count.items():
            assert count <= nodes[node_key].resources["nvidia.com/gpu"]
            assert count <= nodes[node_key].resources["pods"]

        # Check if we are maintaining allocations for non-preemptible jobs
        for i in NON_PREEMPTIBLE_IDXS:
            if (i in allocations) and prev_allocs[i]:
                assert allocations[i] == prev_allocs[i]

        prev_allocs = copy.deepcopy(allocations)
        # Remove one random job
        remove = random.sample(allocations.keys(), 1)[0]
        if remove in NON_PREEMPTIBLE_IDXS:
            NON_PREEMPTIBLE_IDXS.remove(remove)
            print(f"Deleting non-preemptible job {remove}")
        else:
            PREEMPTIBLE_IDXS.remove(remove)
            print(f"Deleting preemptible job {remove}")
        prev_allocs.pop(remove)