Example #1
0
def _distribute_try(computation_graph: ComputationGraph,
                    agents: Iterable[AgentDef],
                    hints: DistributionHints = None,
                    computation_memory=None,
                    communication_load=None,
                    attempt=0):

    agents_capa = {a.name: a.capacity for a in agents}
    # The distribution methods depends on the order used to process the node,
    # we shuffle them to test a new configuration when retry a distribution
    # after a failure
    nodes = list(computation_graph.nodes)
    shuffle(nodes)
    mapping = defaultdict(set)
    var_hosted = {}

    # Distribute owned computation variable on the corresponding agent.
    # For dcop build from an secp, this is the same thing as deploying the
    # light variable on the light devices, as we were doing before.
    for a in agents_capa:
        for c in hints.must_host(a):
            mapping[a].add(c)
            var_hosted.update({c: a})
            agents_capa[a] -= computation_memory(
                computation_graph.computation(c))

    # First mimic original secp adhoc behavior
    for n in nodes:
        if n.name in var_hosted:
            continue
        hostwith = hints.host_with(n.name)
        # secp models have a constraint that should be hosted on the same
        # agent than the variable of the model
        if len(hostwith) == 1 and n.type == 'FactorComputation' and \
            computation_graph.computation(hostwith[0]).type \
                == 'VariableComputation':

            dependent_var = [v.name for v in n.factor.dimensions]
            candidates = [
                a for a in agents_capa
                if len(set(mapping[a]).intersection(dependent_var)) > 0
            ]

            candidates.sort(key=lambda x: len(mapping[a]))
            if candidates:
                selected = candidates[0]
            else:
                selected = choice(list(agents_capa.keys()))

            mapping[selected].update({n.name, hostwith[0]})
            var_hosted[n.name] = selected
            var_hosted[hostwith[0]] = selected
            agents_capa[selected] -= computation_memory(n)

    for n in nodes:
        if n.name in var_hosted:
            continue
        footprint = computation_memory(n)
        # Candidates : hints only with enough capacity
        candidates = [(agents_capa[a], a) for a in hints.host_with(n.name)
                      if agents_capa[a] > footprint]
        # If no hinted agents has enough capacity, fall back to all agents
        if not candidates:
            candidates = [(c, a) for a, c in agents_capa.items()
                          if c > footprint]

        # Select the candidate that is already hosting the highest
        # number of computations sharing a link with this one.
        scores = []
        for capacity, a in candidates:
            count = 0
            for l in computation_graph.links_for_node(n.name):
                count += len([None for l_n in l.nodes if l_n in mapping[a]])
            # The tuple is in this order so that we sort by score first,
            # and then by available capacity.
            scores.append((count, capacity, a))
        scores.sort(reverse=True)

        if scores:
            selected = scores[0][2]
            agents_capa[selected] -= footprint
        else:
            # Retry 3 times in case of failure, the nodes will be shuffled
            # every time, increasing the probability to find a feasible
            # distribution.
            if attempt > 2:
                raise ImpossibleDistributionException(
                    'Could not find feasible distribution after {} '
                    'attempts'.format(attempt))
            else:
                _distribute_try(computation_graph, agents, hints,
                                computation_memory, computation_graph,
                                attempt + 1)

        mapping[selected].update({n.name})
        var_hosted[n.name] = selected

    return Distribution({a: list(mapping[a]) for a in mapping})
Example #2
0
    def test_must_host(self):

        dh = DistributionHints(must_host={'a1': ['v1']})
        self.assertIn('v1', dh.must_host('a1'))
Example #3
0
    def test_must_host_return_empty_when_not_specified(self):

        dh = DistributionHints(must_host={'a1': ['v1']})
        self.assertEqual(len(dh.must_host('a2')), 0)
Example #4
0
def factor_graph_lp_model(cg: ComputationsFactorGraph,
                          agents: List[AgentDef],
                          hints: DistributionHints=None,
                          computation_memory=None,
                          communication_load=None):
    """
    To distribute we need:
    * com : the communication cost of an edge between a var and a fact
    * mem_var : the memory footprint of a variable computation
    * mem_fac : the memory footprint of a factor computation
    
    These function depends on the algorithm.

    Here    
    * mem_var and mem_fac are given by the computation_memory method.
    * com is given by computation_memory

    :return:
    """
    variables = [n for n in cg.nodes if n.type == 'VariableComputation']
    factors = [n for n in cg.nodes if n.type == 'FactorComputation']

    agents = list(agents)
    agents_names = [a.name for a in agents]

    fixed_dist = Distribution({a.name: hints.must_host(a.name)
                               for a in agents})

    # Only keep computations for which we actually need to find an agent.
    vars_to_host = [v.name for v in variables
                    if not fixed_dist.has_computation(v.name)]
    facs_to_host = [f.name for f in factors
                    if not fixed_dist.has_computation(f.name)]

    # x_i^k : binary variable indicating if var x_i is hosted on agent a_k.
    xs = _build_xs_binvar(vars_to_host, agents_names)
    # f_j^k : binary variable indicating if factor f_j is hosted on agent a_k.
    fs = _build_fs_binvar(facs_to_host, agents_names)
    # alpha_ijk : binary variable indicating if  x_i and f_j are both on a_k.
    alphas = _build_alphaijk_binvars(cg, agents_names)

    # LP problem with objective function (total communication cost).
    pb = LpProblem('distribution', LpMinimize)
    pb += _objective_function(cg, communication_load, alphas,
                              agents_names), 'Communication costs'
    # Constraints.
    # All variable computations must be hosted:
    for i in vars_to_host:
        pb += lpSum([xs[(i, k)] for k in agents_names]) == 1, \
              'var {} is hosted'.format(i)

    # All factor computations must be hosted:
    for j in facs_to_host:
        pb += lpSum([fs[(j, k)] for k in agents_names]) == 1, \
              'factor {} is hosted'.format(j)

    # Each agent must host at least one computation:
    # We only need this constraints for agents that do not already host a
    # computation:
    empty_agents = [a for a in agents_names if not hints.must_host(a)]
    for k in empty_agents:
        pb += lpSum([xs[(i, k)] for i in vars_to_host]) + \
              lpSum([fs[(j, k)] for j in facs_to_host]) >= 1, \
              'atleastone {}'.format(k)

    # Memory capacity constraint for agents
    for a in agents:
        # Decrease capacity for already hosted computations
        capacity = a.capacity - \
                   sum([_computation_memory_in_cg(c, cg, computation_memory)
                        for c in hints.must_host(a.name)])

        pb += lpSum([_computation_memory_in_cg(i, cg, computation_memory) *
                     xs[(i, a.name)] for i in vars_to_host]) \
            + lpSum([_computation_memory_in_cg(j, cg, computation_memory) *
                     fs[(j, a.name)] for j in facs_to_host]) <= capacity, \
            'memory {}'.format(a.name)

    # Linearization constraints for alpha_ijk.
    for link in cg.links:
        i, j = link.variable_node, link.factor_node
        for k in agents_names:

            if i in vars_to_host and j in facs_to_host:
                pb += alphas[((i, j), k)] <= xs[(i, k)], \
                    'lin1 {}{}{}'.format(i, j, k)
                pb += alphas[((i, j), k)] <= fs[(j, k)], \
                    'lin2 {}{}{}'.format(i, j, k)
                pb += alphas[((i, j), k)] >= xs[(i, k)] + fs[(j, k)] - 1, \
                    'lin3 {}{}{}'.format(i, j, k)

            elif i in vars_to_host and j not in facs_to_host:
                # Var is free, factor is already hosted
                if fixed_dist.agent_for(j) == k:
                    pb += alphas[((i, j), k)] == xs[(i, k)]
                else:
                    pb += alphas[((i, j), k)] == 0

            elif i not in vars_to_host and j in facs_to_host:
                # if i is not in vars_vars_to_host, it means that it's a
                # computation that is already hosted (from  hints)
                if fixed_dist.agent_for(i) == k:
                    pb += alphas[((i, j), k)] == fs[(j, k)]
                else:
                    pb += alphas[((i, j), k)] == 0

            else:
                # i and j are both alredy hosted
                if fixed_dist.agent_for(i) == k and fixed_dist.agent_for(j) \
                        == k:
                    pb += alphas[((i, j), k)] == 1
                else:
                    pb += alphas[((i, j), k)] == 0

    # Now solve our LP
    # status = pb.solve(GLPK_CMD())
    # status = pb.solve(GLPK_CMD(mip=1))
    # status = pb.solve(GLPK_CMD(mip=0, keepFiles=1,
    #                                options=['--simplex', '--interior']))
    status = pb.solve(GLPK_CMD(keepFiles=0, msg=False, options=['--pcost']))

    if status != LpStatusOptimal:
        raise ImpossibleDistributionException("No possible optimal"
                                              " distribution ")
    else:
        logger.debug('GLPK cost : %s', value(pb.objective))

        comp_dist = fixed_dist
        for k in agents_names:

            agt_vars = [i for i, ka in xs
                        if ka == k and value(xs[(i, ka)]) == 1]
            comp_dist.host_on_agent(k, agt_vars)

            agt_rels = [j for j, ka in fs
                        if ka == k and value(fs[(j, ka)]) == 1]
            comp_dist.host_on_agent(k, agt_rels)
        return comp_dist