Ejemplo n.º 1
0
class MaxMinFairnessPolicyWithPerf(Policy):
    def __init__(self, solver):
        Policy.__init__(self, solver)
        self._name = 'MaxMinFairness_Perf'
        self._proportional_policy = ProportionalPolicy()

    def get_allocation(self, unflattened_throughputs, scale_factors,
                       unflattened_priority_weights, cluster_spec):
        throughputs, index = super().flatten(unflattened_throughputs,
                                             cluster_spec)
        if throughputs is None: return None
        (m, n) = throughputs.shape
        (job_ids, worker_types) = index

        # Row i of scale_factors_array is the scale_factor of job i
        # repeated len(worker_types) times.
        scale_factors_array = self.scale_factors_array(scale_factors, job_ids,
                                                       m, n)

        priority_weights = np.array(
            [1. / unflattened_priority_weights[job_id] for job_id in job_ids])

        proportional_throughputs = self._proportional_policy.get_throughputs(
            throughputs, index, cluster_spec)
        priority_weights = np.multiply(
            priority_weights.reshape((m, 1)),
            1.0 / proportional_throughputs.reshape((m, 1)))

        x = cp.Variable(throughputs.shape)
        # Multiply throughputs by scale_factors to ensure that scale_factor
        # is taken into account while allocating times to different jobs.
        # A job run on 1 GPU should receive `scale_factor` more time than
        # a job run on `scale_factor` GPUs if throughputs are equal.
        objective = cp.Maximize(
            cp.min(
                cp.sum(cp.multiply(
                    np.multiply(throughputs * priority_weights.reshape((m, 1)),
                                scale_factors_array), x),
                       axis=1)))
        # Make sure that the allocation can fit in the cluster.
        constraints = self.get_base_constraints(x, scale_factors_array)
        cvxprob = cp.Problem(objective, constraints)
        result = cvxprob.solve(solver=self._solver)

        if cvxprob.status != "optimal":
            print('WARNING: Allocation returned by policy not optimal!')

        return super().unflatten(x.value.clip(min=0.0).clip(max=1.0), index)
Ejemplo n.º 2
0
class MaxMinFairnessPolicyWithPacking(PolicyWithPacking):
    def __init__(self, solver):
        PolicyWithPacking.__init__(self, solver)
        self._name = 'MaxMinFairness_Packing'
        self._proportional_policy = ProportionalPolicy()

    def get_allocation_using_job_type_throughputs(
            self, unflattened_throughputs, job_id_to_job_type_key,
            scale_factors, unflattened_priority_weights, cluster_spec):
        job_ids = sorted(job_id_to_job_type_key.keys())
        if len(job_ids) == 0:
            return None
        job_type_keys = sorted(unflattened_throughputs.keys())
        worker_types = sorted(cluster_spec.keys())
        num_workers = \
            [cluster_spec[worker_type] for worker_type in worker_types]

        # Create a map from job type to list of job indexes.
        job_type_key_to_job_idx = {}
        for i, job_id in enumerate(job_ids):
            job_type_key = job_id_to_job_type_key[job_id]
            if job_type_key not in job_type_key_to_job_idx:
                job_type_key_to_job_idx[job_type_key] = []
            job_type_key_to_job_idx[job_type_key].append(i)

        # Num jobs.
        n = len(job_ids)
        # Num job_types.
        a = len(unflattened_throughputs.keys())
        # Num worker_types.
        m = len(worker_types)
        # Num varibles per job.
        num_vars_per_job = 1 + a

        # Set up scale factors.
        flattened_scale_factors = \
            np.reshape([scale_factors[job_id] for job_id in job_ids], (n, 1))
        scale_factors_array = np.tile(flattened_scale_factors,
                                      (1, num_vars_per_job * m))

        # Set up flattened job type throughputs.
        flattened_throughputs = np.zeros(shape=(a, (1 + a) * m),
                                         dtype=np.float32)
        for i, job_type_key in enumerate(job_type_keys):
            for k, worker_type in enumerate(worker_types):
                for j, other_job_type_key in enumerate([None] + job_type_keys):
                    if j > 0 and other_job_type_key[1] != job_type_key[1]:
                        flattened_throughputs[i, k * (1 + a) + j] = 0.0
                    else:
                        flattened_throughputs[i,k*(1+a)+j] = \
                            unflattened_throughputs[job_type_key][worker_type][other_job_type_key]

        # Set up masks to avoid double-counting allocation values when
        # computing constraint that the sum of allocation values of each
        # worker type must be <= the number of workers of that worker type.
        # TODO: Change this if we ever consider combinations larger than pairs.
        masks = np.full(shape=(n, num_vars_per_job), fill_value=0.5)
        masks[:, 0] = 1.0

        # Allocation matrix.
        x = cp.Variable((n, num_vars_per_job * m))

        constraints = [
            # All allocation values must be >= 0.
            x >= 0,
            # The sum of allocation values for each job must be <= 1.
            cp.sum(x, axis=1) <= 1
        ]

        # The sum of allocation values for each worker type must be <=
        # the number of workers of that type.
        per_worker_type_allocations = []
        for i in range(m):
            relevant_vars = \
                x[:,i*num_vars_per_job:(i+1)*num_vars_per_job]
            relevant_scale_factors = \
                scale_factors_array[:,i*num_vars_per_job:(i+1)*num_vars_per_job]
            per_worker_type_allocations.append(
                cp.sum(
                    cp.multiply(relevant_vars,
                                cp.multiply(relevant_scale_factors, masks))))
        constraints.append(
            cp.hstack(per_worker_type_allocations) <= num_workers)

        # Set the following constraints:
        # for all job type pairs a, b:
        #   sum of allocation of all jobs of type a paired with type b ==
        #   sum of allocation of all jobs of type b paired with type a
        lhs = []
        rhs = []
        for i, job_type_key_0 in enumerate(job_type_keys):
            for j, job_type_key_1 in enumerate(job_type_keys):
                if j <= i:
                    continue
                elif job_type_key_0[1] != job_type_key_1[1]:
                    continue

                # Retrieve the list of jobs of each type.
                job_type_0_jobs = job_type_key_to_job_idx[job_type_key_0]
                job_type_1_jobs = job_type_key_to_job_idx[job_type_key_1]

                for k in range(m):
                    job_type_0_mask = np.zeros(x.shape)
                    job_type_1_mask = np.zeros(x.shape)

                    # Allocation of job_type_0 jobs when paired with job_type_1
                    for job_idx in job_type_0_jobs:
                        offset = k * num_vars_per_job + 1 + j
                        job_type_0_mask[job_idx, offset] = 1

                    # Allocation of job_type_1 jobs when paired with job_type_0
                    for job_idx in job_type_1_jobs:
                        offset = k * num_vars_per_job + 1 + i
                        job_type_1_mask[job_idx, offset] = 1

                    lhs.append(cp.sum(x[job_type_0_mask == 1]))
                    rhs.append(cp.sum(x[job_type_1_mask == 1]))

        assert (len(lhs) == len(rhs))
        if len(lhs) > 0:
            constraints.append(cp.hstack(lhs) == cp.hstack(rhs))

        # Add constraints to make all variables of the form i-A where job i
        # is of job type A equal.
        for i, job_type_key in enumerate(job_type_keys):
            for k in range(m):
                same_job_type_vars = []
                job_type_jobs = job_type_key_to_job_idx[job_type_key]

                # Find all variables for job-job_type pairs where the job
                # types match.
                offset = k * num_vars_per_job + 1 + i
                for job_idx in job_type_jobs:
                    same_job_type_vars.append(x[job_idx, offset])

                # Constrain the variables to all be equal.
                c = cp.Variable()
                constraints.append(cp.hstack(same_job_type_vars) == c)

        throughputs_no_packed_jobs = np.zeros(
            (len(job_ids), len(worker_types)))
        for i, job_id in enumerate(job_ids):
            job_type_key = job_id_to_job_type_key[job_id]
            for j, worker_type in enumerate(worker_types):
                throughputs_no_packed_jobs[i, j] = \
                    unflattened_throughputs[job_type_key][worker_type][None]
        proportional_throughputs = self._proportional_policy.get_throughputs(
            throughputs_no_packed_jobs, (job_ids, worker_types), cluster_spec)

        # Allocation coefficients.
        all_coefficients = np.zeros((n, num_vars_per_job * m))
        for i, job_id in enumerate(job_ids):
            job_type_key = job_id_to_job_type_key[job_id]
            job_type_idx = job_type_keys.index(job_type_key)
            if len(job_type_key_to_job_idx[job_type_key]) == 1:
                for k, worker_type in enumerate(worker_types):
                    offset = k * num_vars_per_job + 1 + job_type_idx
                    constraints.append(x[i, offset] == 0.0)
            proportional_throughput = proportional_throughputs[i]
            all_coefficients[i] = \
                np.multiply(flattened_throughputs[job_type_idx],
                            scale_factors_array[i]) /\
                    (unflattened_priority_weights[job_id] * proportional_throughput)
        objective = \
            cp.Maximize(cp.min(cp.sum(cp.multiply(all_coefficients, x),
                                      axis=1)))

        cvxprob = cp.Problem(objective, constraints)
        result = cvxprob.solve(solver=self._solver)

        if cvxprob.status != "optimal":
            print('WARNING: Allocation returned by policy not optimal!')

        allocation = x.value.clip(min=0.0).clip(max=1.0)

        # Unflatten allocation.
        unflattened_allocation = {}
        for i, job_id in enumerate(job_ids):
            unflattened_allocation[job_id] = {}
            for j, worker_type in enumerate(worker_types):
                unflattened_allocation[job_id][worker_type] = {}
                for k, job_type_key in enumerate([None] + job_type_keys):
                    unflattened_allocation[job_id][worker_type][job_type_key] = \
                        allocation[i, j * num_vars_per_job + k]

        return self.convert_job_type_allocation(unflattened_allocation,
                                                job_id_to_job_type_key)

    def get_allocation(self, unflattened_throughputs, scale_factors,
                       unflattened_priority_weights, cluster_spec):
        all_throughputs, index = \
            self.flatten(d=unflattened_throughputs,
                         cluster_spec=cluster_spec,
                         priority_weights=unflattened_priority_weights)
        if all_throughputs is None or len(all_throughputs) == 0: return None
        (m, n) = all_throughputs[0].shape
        (job_ids, single_job_ids, worker_types, relevant_combinations) = index
        x = cp.Variable((m, n))

        # Row i of scale_factors_array is the scale_factor of job
        # combination i repeated len(worker_types) times.
        scale_factors_array = self.scale_factors_array(scale_factors, job_ids,
                                                       m, n)

        throughputs_no_packed_jobs = np.zeros((len(single_job_ids), n))
        for i, single_job_id in enumerate(single_job_ids):
            for j, worker_type in enumerate(worker_types):
                throughputs_no_packed_jobs[i, j] = \
                    unflattened_throughputs[single_job_id][worker_type]
        proportional_throughputs = self._proportional_policy.get_throughputs(
            throughputs_no_packed_jobs, (single_job_ids, worker_types),
            cluster_spec)

        objective_terms = []
        # Multiply throughputs by scale_factors to ensure that scale_factor
        # is taken into account while allocating times to different jobs.
        # A job run on 1 GPU should receive `scale_factor` more time than
        # a job run on `scale_factor` GPUs.
        for i in range(len(all_throughputs)):
            indexes = relevant_combinations[single_job_ids[i]]
            proportional_throughput = proportional_throughputs[i]
            objective_terms.append(
                cp.sum(
                    cp.multiply(
                        np.multiply(all_throughputs[i][indexes],
                                    scale_factors_array[indexes]), x[indexes]))
                / proportional_throughput)
        if len(objective_terms) == 1:
            objective = cp.Maximize(objective_terms[0])
        else:
            objective = cp.Maximize(cp.minimum(*objective_terms))

        # Make sure the allocation can fit in the cluster.
        constraints = self.get_base_constraints(x, single_job_ids,
                                                scale_factors_array,
                                                relevant_combinations)

        # Explicitly constrain all allocation values with an effective scale
        # factor of 0 to be 0.
        # NOTE: This is not strictly necessary because these allocation values
        # do not affect the optimal allocation for nonzero scale factor
        # combinations.
        for i in range(m):
            for j in range(n):
                if scale_factors_array[i, j] == 0:
                    constraints.append(x[i, j] == 0)
        cvxprob = cp.Problem(objective, constraints)
        result = cvxprob.solve(solver=self._solver)

        if cvxprob.status != "optimal":
            print('WARNING: Allocation returned by policy not optimal!')

        return self.unflatten(x.value.clip(min=0.0).clip(max=1.0), index)
class MaxMinFairnessWaterFillingPolicyWithPerf(Policy, WaterFillingAlgorithm):
    def __init__(self, priority_reweighting_policies=None):
        WaterFillingAlgorithm.__init__(self, priority_reweighting_policies)
        Policy.__init__(self, solver=None)
        self._name = 'MaxMinFairnessWaterFilling_Perf'
        self._proportional_policy = ProportionalPolicy()

    def _get_constraints(self, x, scale_factors_array):
        return self.get_base_constraints(x, scale_factors_array)

    def get_allocation(self,
                       unflattened_throughputs,
                       scale_factors,
                       unflattened_priority_weights,
                       cluster_spec,
                       entity_weights=None,
                       entity_to_job_mapping=None,
                       verbose=False,
                       return_effective_throughputs=False):
        throughputs, index = super().flatten(unflattened_throughputs,
                                             cluster_spec)
        if throughputs is None: return None

        (job_ids, worker_types) = index
        (m, n) = throughputs.shape

        # Row i of scale_factors_array is the scale_factor of job i
        # repeated len(worker_types) times.
        scale_factors_array = self.scale_factors_array(scale_factors, job_ids,
                                                       m, n)
        proportional_throughputs = self._proportional_policy.get_throughputs(
            throughputs, index, cluster_spec)

        self._M = np.max(
            np.multiply(
                throughputs * (1.0 / proportional_throughputs).reshape((m, 1)),
                scale_factors_array))
        self._get_effective_throughputs = lambda x: \
            cp.sum(cp.multiply(throughputs, x), axis=1)

        x = self._run_get_allocation_iterations(
            job_ids,
            m,
            n,
            proportional_throughputs,
            scale_factors_array,
            entity_weights,
            unflattened_priority_weights,
            cluster_spec,
            entity_to_job_mapping=entity_to_job_mapping,
            verbose=verbose)

        priority_weights = np.array(
            [1. / unflattened_priority_weights[job_id] for job_id in job_ids])
        priority_weights = np.multiply(
            priority_weights.reshape((m, 1)),
            1.0 / proportional_throughputs.reshape((m, 1)))
        effective_throughputs = np.sum(np.multiply(throughputs, x), axis=1)
        normalized_effective_throughputs = np.multiply(
            effective_throughputs, 1.0 / proportional_throughputs.reshape(m))

        if verbose:
            print("Normalized effective throughputs:",
                  normalized_effective_throughputs)
            print("Constraints:",
                  np.multiply(x, scale_factors_array).sum(axis=0),
                  x.sum(axis=1))

        if return_effective_throughputs:
            return normalized_effective_throughputs

        self._lp = None
        self._milp = None

        return super().unflatten(x.clip(min=0.0).clip(max=1.0), index)
class MaxMinFairnessWaterFillingPolicyWithPacking(PolicyWithPacking,
                                                  WaterFillingAlgorithm):
    def __init__(self, priority_reweighting_policies=None):
        WaterFillingAlgorithm.__init__(self, priority_reweighting_policies)
        PolicyWithPacking.__init__(self, solver=None)
        self._name = 'MaxMinFairnessWaterFilling_Packing'
        self._proportional_policy = ProportionalPolicy()

    def _get_constraints(self, x, scale_factors_array):
        return self.get_base_constraints(x, self._single_job_ids,
                                         scale_factors_array,
                                         self._relevant_combinations)

    def _get_M(self, all_throughputs, index, proportional_throughputs,
               scale_factors_array):
        (_, single_job_ids, _, relevant_combinations) = index
        max_throughputs = []
        for i, single_job_id in enumerate(single_job_ids):
            indexes = relevant_combinations[single_job_id]
            max_throughputs.append(
                np.max(
                    np.multiply(all_throughputs[i][indexes],
                                scale_factors_array[indexes])))
            max_throughputs[-1] /= proportional_throughputs[i]
        return np.max(np.array(max_throughputs))

    def _get_effective_throughputs_helper(self, x, all_throughputs, index):
        (_, single_job_ids, _, relevant_combinations) = index
        effective_throughputs = []
        for i, single_job_id in enumerate(single_job_ids):
            indexes = relevant_combinations[single_job_id]
            effective_throughputs.append(
                cp.sum(cp.multiply(all_throughputs[i][indexes], x[indexes])))
        return cp.hstack(effective_throughputs)

    def get_allocation(self,
                       unflattened_throughputs,
                       scale_factors,
                       unflattened_priority_weights,
                       cluster_spec,
                       entity_weights=None,
                       entity_to_job_mapping=None,
                       verbose=False,
                       return_effective_throughputs=False):
        all_throughputs, index = \
            self.flatten(d=unflattened_throughputs,
                         cluster_spec=cluster_spec)
        if all_throughputs is None or len(all_throughputs) == 0: return None

        (job_ids, single_job_ids, worker_types, relevant_combinations) = index
        self._single_job_ids = single_job_ids
        self._relevant_combinations = relevant_combinations
        (m, n) = all_throughputs[0].shape

        # Row i of scale_factors_array is the scale_factor of job i
        # repeated len(worker_types) times.
        scale_factors_array = self.scale_factors_array(scale_factors, job_ids,
                                                       m, n)
        throughputs_no_packed_jobs = np.zeros((len(single_job_ids), n))
        for i, single_job_id in enumerate(single_job_ids):
            for j, worker_type in enumerate(worker_types):
                throughputs_no_packed_jobs[i, j] = \
                    unflattened_throughputs[single_job_id][worker_type]
        proportional_throughputs = self._proportional_policy.get_throughputs(
            throughputs_no_packed_jobs, (single_job_ids, worker_types),
            cluster_spec)

        self._M = self._get_M(all_throughputs, index, proportional_throughputs,
                              scale_factors_array)
        self._get_effective_throughputs = lambda x: \
            self._get_effective_throughputs_helper(x, all_throughputs, index)

        x = self._run_get_allocation_iterations(
            single_job_ids,
            m,
            n,
            proportional_throughputs,
            scale_factors_array,
            entity_weights,
            unflattened_priority_weights,
            cluster_spec,
            entity_to_job_mapping=entity_to_job_mapping,
            verbose=verbose)

        priority_weights = np.array([
            1. / unflattened_priority_weights[single_job_id]
            for single_job_id in single_job_ids
        ])
        priority_weights = np.multiply(
            priority_weights.reshape((len(single_job_ids), 1)),
            1.0 / proportional_throughputs.reshape((len(single_job_ids), 1)))
        effective_throughputs = np.zeros(len(single_job_ids))
        for i, single_job_id in enumerate(single_job_ids):
            indexes = relevant_combinations[single_job_id]
            effective_throughputs[i] = np.sum(
                np.multiply(all_throughputs[i][indexes], x[indexes]))
        normalized_effective_throughputs = np.multiply(
            effective_throughputs,
            1.0 / proportional_throughputs.reshape(len(single_job_ids)))

        if verbose:
            print("Normalized effective throughputs:",
                  normalized_effective_throughputs)
            print("Constraints:",
                  np.multiply(x, scale_factors_array).sum(axis=0),
                  x.sum(axis=1))

        if return_effective_throughputs:
            return normalized_effective_throughputs

        self._lp = None
        self._milp = None

        return super().unflatten(x.clip(min=0.0).clip(max=1.0), index)
class MaxMinFairnessStrategyProofPolicyWithPerf(Policy):
    def __init__(self, solver):
        Policy.__init__(self, solver)
        self._name = 'MaxMinFairness_Perf'
        self._proportional_policy = ProportionalPolicy()

    def get_allocation(self,
                       unflattened_throughputs,
                       scale_factors,
                       unflattened_priority_weights,
                       cluster_spec,
                       recurse_deeper=True):
        throughputs, index = super().flatten(unflattened_throughputs,
                                             cluster_spec)
        if throughputs is None: return None
        (m, n) = throughputs.shape
        (job_ids, worker_types) = index

        if recurse_deeper:
            all_throughputs_minus_job = []
            for job_id in job_ids:
                unflattened_throughputs_minus_job = copy.copy(
                    unflattened_throughputs)
                del unflattened_throughputs_minus_job[job_id]
                throughputs_minus_job = self.get_allocation(
                    unflattened_throughputs_minus_job,
                    scale_factors,
                    unflattened_priority_weights,
                    cluster_spec,
                    recurse_deeper=False)
                all_throughputs_minus_job.append(throughputs_minus_job)

        # Row i of scale_factors_array is the scale_factor of job i
        # repeated len(worker_types) times.
        scale_factors_array = self.scale_factors_array(scale_factors, job_ids,
                                                       m, n)

        priority_weights = np.array(
            [1. / unflattened_priority_weights[job_id] for job_id in job_ids])

        proportional_throughputs = self._proportional_policy.get_throughputs(
            throughputs, index, cluster_spec)
        priority_weights = np.multiply(
            priority_weights.reshape((m, 1)),
            1.0 / proportional_throughputs.reshape((m, 1)))

        x = cp.Variable(throughputs.shape)
        # Multiply throughputs by scale_factors to ensure that scale_factor
        # is taken into account while allocating times to different jobs.
        # A job run on 1 GPU should receive `scale_factor` more time than
        # a job run on `scale_factor` GPUs if throughputs are equal.
        objective = cp.Maximize(
            cp.geo_mean(
                cp.sum(cp.multiply(
                    np.multiply(throughputs * priority_weights.reshape((m, 1)),
                                scale_factors_array), x),
                       axis=1)))
        # Make sure that the allocation can fit in the cluster.
        constraints = self.get_base_constraints(x, scale_factors_array)
        cvxprob = cp.Problem(objective, constraints)
        result = cvxprob.solve(solver=self._solver)

        if cvxprob.status != "optimal":
            print('WARNING: Allocation returned by policy not optimal!')

        throughputs = np.sum(np.multiply(throughputs, x.value), axis=1)
        throughputs_dict = {
            job_ids[i]: throughputs[i]
            for i in range(len(job_ids))
        }
        if not recurse_deeper:
            return throughputs_dict

        discount_factors = np.zeros(len(job_ids))
        for i, job_id in enumerate(job_ids):
            discount_factor = 1.0
            for other_job_id in all_throughputs_minus_job[i]:
                discount_factor *= (throughputs_dict[other_job_id] /
                                    all_throughputs_minus_job[i][other_job_id])
            discount_factors[i] = discount_factor

        discounted_allocation = np.multiply(x.value.T, discount_factors).T

        return super().unflatten(discounted_allocation.clip(min=0.0).clip(max=1.0), index), \
            discount_factors