def test_host_with_several(self): dh = DistributionHints(host_with={'c1': ['v1', 'v2']}) self.assertIn('c1', dh.host_with('v1')) self.assertIn('v1', dh.host_with('c1')) self.assertIn('v2', dh.host_with('c1')) self.assertIn('v2', dh.host_with('c1'))
def test_rule_with_model(self): hints = DistributionHints(must_host={ 'a1': ['v1'], 'a3': ['v2'] }, host_with={'m1': ['mf1']}) agents = [ AgentDef('a{}'.format(i), capacity=100) for i in range(1, 11) ] agent_mapping = distribute(self.cg, agents, hints, computation_memory=lambda x: 10) # rule should be hosted either with model m1 or variable v2 # print(agent_mapping.agent_for('m1'), agent_mapping.agent_for('mf1'), # agent_mapping.agent_for('v1'), agent_mapping.agent_for('v2'), # agent_mapping.agent_for('r1')) self.assertTrue( agent_mapping.agent_for('r1') == agent_mapping.agent_for('v2') or agent_mapping.agent_for('m1') == agent_mapping.agent_for('r1')) self.assertTrue(is_all_hosted(self.cg, agent_mapping))
def test_host_on_highest_dependent_agent(self): d1 = VariableDomain('d1', '', [1, 2, 3, 5]) v1 = Variable('v1', d1) v2 = Variable('v2', d1) v3 = Variable('v3', d1) f1 = relation_from_str('f1', 'v1 + v2', [v1, v2]) f2 = relation_from_str('f2', 'v1 - v2 + v3', [v1, v2, v3]) cv1 = VariableComputationNode(v1, ['f1', 'f2']) cv2 = VariableComputationNode(v2, ['f1', 'f2']) cv3 = VariableComputationNode(v3, ['f2']) cf1 = FactorComputationNode(f1) cf2 = FactorComputationNode(f2) cg = ComputationsFactorGraph([cv1, cv2, cv3], [cf1, cf2]) hints = DistributionHints(must_host={'a1': ['v1'], 'a2': ['v2', 'v3']}) # we must set the capacity to make sure that a2 cannot take f1 agents = [AgentDef('a{}'.format(i), capacity=41) for i in range(1, 11)] agent_mapping = distribute(cg, agents, hints, computation_memory=lambda x: 10) print(agent_mapping) self.assertEqual(agent_mapping.agent_for('f1'), 'a1') self.assertEqual(agent_mapping.agent_for('f2'), 'a2') self.assertTrue(is_all_hosted(cg, agent_mapping))
def distribute(computation_graph: ComputationGraph, agentsdef: Iterable[AgentDef], hints: DistributionHints=None, computation_memory=None, communication_load=None): """ Generate a distribution for the dcop. :param computation_graph: a ComputationGraph :param agentsdef: the agents definitions :param hints: a DistributionHints :param computation_memory: a function that takes a computation node as an argument and return the memory footprint for this :param link_communication: a function that takes a Link as an argument and return the communication cost of this edge """ if computation_memory is None or communication_load is None: raise ImpossibleDistributionException('LinearProg distribution requires ' 'computation_memory and link_communication functions') agents = list(agentsdef) hints = DistributionHints() if hints is None else hints return factor_graph_lp_model(computation_graph, agents, hints, computation_memory, communication_load)
def _build_dist_hints(loaded, dcop): if "distribution_hints" not in loaded: return None loaded = loaded["distribution_hints"] must_host, host_with = None, None if "must_host" in loaded: for a in loaded["must_host"]: if a not in dcop.agents: raise ValueError("Cannot use must_host with unknown agent " "{}".format(a)) for c in loaded["must_host"][a]: if c not in dcop.variables and c not in dcop.constraints: raise ValueError("Cannot use must_host with unknown " "variable or constraint {}".format(c)) must_host = loaded["must_host"] if "host_with" in loaded: host_with = defaultdict(lambda: set()) for i in loaded["host_with"]: host_with[i].update(loaded["host_with"][i]) for j in loaded["host_with"][i]: s = {i}.union(loaded["host_with"][i]) s.remove(j) host_with[j].update(s) return DistributionHints(must_host, dict(host_with) if host_with is not None else {})
def _build_dist_hints(loaded, dcop): if 'distribution_hints' not in loaded: return None loaded = loaded['distribution_hints'] must_host, host_with = None, None if 'must_host' in loaded: for a in loaded['must_host']: if a not in dcop.agents: raise ValueError('Cannot use must_host with unknown agent ' '{}'.format(a)) for c in loaded['must_host'][a]: if c not in dcop.variables and c not in dcop.constraints: raise ValueError('Cannot use must_host with unknown ' 'variable or constraint {}'.format(c)) must_host = loaded['must_host'] if 'host_with' in loaded: host_with = defaultdict(lambda: set()) for i in loaded['host_with']: host_with[i].update(loaded['host_with'][i]) for j in loaded['host_with'][i]: s = {i}.union(loaded['host_with'][i]) s.remove(j) host_with[j].update(s) return DistributionHints(must_host, dict(host_with) if host_with is not None else {})
def distribute(computation_graph: ComputationGraph, agentsdef: Iterable[AgentDef], hints: DistributionHints = None, computation_memory=None, communication_load=None): """ Generate a distribution for the dcop. This method uses a simple heuristic for distribution, with no guaranty of optimality. Even if a feasible distribution exists, this method is not warranted to find it. When using a dcop that represents an secp, given the correct DistributionHint the same distribution should be generated that with the adhoc secp distribution method. """ if computation_memory is None: raise ImpossibleDistributionException('adhoc distribution requires ' 'computation_memory functions') agents = list(agentsdef) hints = DistributionHints() if hints is None else hints return _distribute_try(computation_graph, agents, hints, computation_memory, computation_graph)
def test_rule_with_light(self): hints = DistributionHints(must_host={'a1': ['v1'], 'a3': ['v2']}, host_with={'m1': ['mf1']}) agents = [AgentDef('a{}'.format(i), capacity=100) for i in range(1, 11)] agent_mapping = distribute(self.cg, agents, hints, computation_memory=lambda x: 10) # rule r2 only depends on v3, it must be hosted on the same agent self.assertEqual(agent_mapping.agent_for('v3'), agent_mapping.agent_for('r2')) self.assertTrue(is_all_hosted(self.cg, agent_mapping))
def test_respect_must_host_for_var(self): f1 = relation_from_str('f1', 'v1 * 0.5', [v1]) cv1 = VariableComputationNode(v1, ['f1']) cf1 = FactorComputationNode(f1) cg = ComputationsFactorGraph([cv1], [cf1]) hints = DistributionHints(must_host={'a1': ['v1']}) agent_mapping = distribute(cg, [a1, a2], hints=hints, computation_memory=ms.computation_memory, communication_load=ms.communication_load) self.assertEqual(agent_mapping.agent_for('v1'), 'a1')
def test_model_on_dependent_light(self): hints = DistributionHints(must_host={'a1': ['v1'], 'a2': ['v2']}, host_with={'m1': ['mf1']}) agents = [AgentDef('a{}'.format(i), capacity=100) for i in range(1, 11)] agent_mapping = distribute(self.cg, agents, hints, computation_memory=lambda x: 10) # Check that the variable and relation of the model are on the same # agent self.assertIn(agent_mapping.agent_for('m1'), ['a1', 'a2']) self.assertIn(agent_mapping.agent_for('mf1'), ['a1', 'a2']) self.assertTrue(is_all_hosted(self.cg, agent_mapping))
def test_must_host_one(self): d1 = VariableDomain('d1', '', [1, 2, 3, 5]) v1 = Variable('v1', d1) f1 = relation_from_str('f1', 'v1 * 0.5', [v1]) cv1 = VariableComputationNode(v1, ['f1']) cf1 = FactorComputationNode(f1) cg = ComputationsFactorGraph([cv1], [cf1]) hints = DistributionHints({'a1': ['v1']}, None) agents = [AgentDef('a1', capacity=100), AgentDef('a2', capacity=100)] agent_mapping = distribute(cg, agents, hints, computation_memory=lambda x: 10) self.assertIn('v1', agent_mapping.computations_hosted('a1')) self.assertTrue(is_all_hosted(cg, agent_mapping))
def test_respect_must_host_all_computation_invalid(self): f1 = relation_from_str('f1', 'v1 * 0.5', [v1]) cv1 = VariableComputationNode(v1, ['f1']) cf1 = FactorComputationNode(f1) cg = ComputationsFactorGraph([cv1], [cf1]) hints = DistributionHints(must_host={'a1': ['f1', 'v1']}) # These hints lead to an impossible distribution, as ilp-fgdp requires # each agent to host at least one computation. Here Both # computations are hosted on a1 and there is no computation # available for a2 ! self.assertRaises(ImpossibleDistributionException, distribute, cg, [a1, a2], hints=hints, computation_memory=ms.computation_memory, communication_load=ms.communication_load)
def test_comm_not_enough_place(self): f1 = relation_from_str('f1', 'v1 * 0.5 + v2 + v3', [v1, v2, v3]) cv1 = VariableComputationNode(v1, ['f1']) cv2 = VariableComputationNode(v2, ['f1']) cv3 = VariableComputationNode(v3, ['f1']) cf1 = FactorComputationNode(f1) cg = ComputationsFactorGraph([cv1, cv2, cv3], [cf1]) hints = DistributionHints(must_host={'a1': ['v1', 'v2']}) a1.capacity = 10 agent_mapping = distribute(cg, [a1, a2], hints=hints, computation_memory=ms.computation_memory, communication_load=ms.communication_load) # As there is enough not capacity on a1, factor f1 and variable v3 # must go on a2 self.assertEqual(agent_mapping.agent_for('f1'), 'a2') self.assertEqual(agent_mapping.agent_for('v3'), 'a2')
def test_comm(self): f1 = relation_from_str('f1', 'v1 * 0.5 + v2 + v3', [v1, v2, v3]) cv1 = VariableComputationNode(v1, ['f1']) cv2 = VariableComputationNode(v2, ['f1']) cv3 = VariableComputationNode(v3, ['f1']) cf1 = FactorComputationNode(f1) cg = ComputationsFactorGraph([cv1, cv2, cv3], [cf1]) hints = DistributionHints(must_host={'a1': ['v1', 'v2']}) a1.capacity = 1000 agent_mapping = distribute(cg, [a1, a2], hints=hints, computation_memory=ms.computation_memory, communication_load=ms.communication_load) # As there is enough capacity on a1, factor f1 must go there (where # most of its variable are already hosted) while v3 must go on a2 to # make sure that all agents are used self.assertEqual(agent_mapping.agent_for('f1'), 'a1') self.assertEqual(agent_mapping.agent_for('v3'), 'a2')
def test_host_with(self): d1 = VariableDomain('d1', '', [1, 2, 3, 5]) v1 = Variable('v1', d1) v2 = Variable('v2', d1) f1 = relation_from_str('f1', 'v1 * 0.5', [v1]) cv1 = VariableComputationNode(v1, ['f1']) cv2 = VariableComputationNode(v2, []) cf1 = FactorComputationNode(f1) cg = ComputationsFactorGraph([cv1, cv2], [cf1]) hints = DistributionHints(None, {'v1': ['f1']}) agents = [AgentDef('a{}'.format(i), capacity=100) for i in range(1, 11)] agent_mapping = distribute(cg, agents, hints, computation_memory=lambda x: 10) self.assertEqual(agent_mapping.agent_for('v1'), agent_mapping.agent_for('f1')) self.assertTrue(is_all_hosted(cg, agent_mapping))
def test_must_host_return_empty_when_not_specified(self): dh = DistributionHints(must_host={'a1': ['v1']}) self.assertEqual(len(dh.must_host('a2')), 0)
def test_must_host(self): dh = DistributionHints(must_host={'a1': ['v1']}) self.assertIn('v1', dh.must_host('a1'))
def _distribute_try(computation_graph: ComputationGraph, agents: Iterable[AgentDef], hints: DistributionHints = None, computation_memory=None, communication_load=None, attempt=0): agents_capa = {a.name: a.capacity for a in agents} # The distribution methods depends on the order used to process the node, # we shuffle them to test a new configuration when retry a distribution # after a failure nodes = list(computation_graph.nodes) shuffle(nodes) mapping = defaultdict(set) var_hosted = {} # Distribute owned computation variable on the corresponding agent. # For dcop build from an secp, this is the same thing as deploying the # light variable on the light devices, as we were doing before. for a in agents_capa: for c in hints.must_host(a): mapping[a].add(c) var_hosted.update({c: a}) agents_capa[a] -= computation_memory( computation_graph.computation(c)) # First mimic original secp adhoc behavior for n in nodes: if n.name in var_hosted: continue hostwith = hints.host_with(n.name) # secp models have a constraint that should be hosted on the same # agent than the variable of the model if len(hostwith) == 1 and n.type == 'FactorComputation' and \ computation_graph.computation(hostwith[0]).type \ == 'VariableComputation': dependent_var = [v.name for v in n.factor.dimensions] candidates = [ a for a in agents_capa if len(set(mapping[a]).intersection(dependent_var)) > 0 ] candidates.sort(key=lambda x: len(mapping[a])) if candidates: selected = candidates[0] else: selected = choice(list(agents_capa.keys())) mapping[selected].update({n.name, hostwith[0]}) var_hosted[n.name] = selected var_hosted[hostwith[0]] = selected agents_capa[selected] -= computation_memory(n) for n in nodes: if n.name in var_hosted: continue footprint = computation_memory(n) # Candidates : hints only with enough capacity candidates = [(agents_capa[a], a) for a in hints.host_with(n.name) if agents_capa[a] > footprint] # If no hinted agents has enough capacity, fall back to all agents if not candidates: candidates = [(c, a) for a, c in agents_capa.items() if c > footprint] # Select the candidate that is already hosting the highest # number of computations sharing a link with this one. scores = [] for capacity, a in candidates: count = 0 for l in computation_graph.links_for_node(n.name): count += len([None for l_n in l.nodes if l_n in mapping[a]]) # The tuple is in this order so that we sort by score first, # and then by available capacity. scores.append((count, capacity, a)) scores.sort(reverse=True) if scores: selected = scores[0][2] agents_capa[selected] -= footprint else: # Retry 3 times in case of failure, the nodes will be shuffled # every time, increasing the probability to find a feasible # distribution. if attempt > 2: raise ImpossibleDistributionException( 'Could not find feasible distribution after {} ' 'attempts'.format(attempt)) else: _distribute_try(computation_graph, agents, hints, computation_memory, computation_graph, attempt + 1) mapping[selected].update({n.name}) var_hosted[n.name] = selected return Distribution({a: list(mapping[a]) for a in mapping})
def factor_graph_lp_model(cg: ComputationsFactorGraph, agents: List[AgentDef], hints: DistributionHints=None, computation_memory=None, communication_load=None): """ To distribute we need: * com : the communication cost of an edge between a var and a fact * mem_var : the memory footprint of a variable computation * mem_fac : the memory footprint of a factor computation These function depends on the algorithm. Here * mem_var and mem_fac are given by the computation_memory method. * com is given by computation_memory :return: """ variables = [n for n in cg.nodes if n.type == 'VariableComputation'] factors = [n for n in cg.nodes if n.type == 'FactorComputation'] agents = list(agents) agents_names = [a.name for a in agents] fixed_dist = Distribution({a.name: hints.must_host(a.name) for a in agents}) # Only keep computations for which we actually need to find an agent. vars_to_host = [v.name for v in variables if not fixed_dist.has_computation(v.name)] facs_to_host = [f.name for f in factors if not fixed_dist.has_computation(f.name)] # x_i^k : binary variable indicating if var x_i is hosted on agent a_k. xs = _build_xs_binvar(vars_to_host, agents_names) # f_j^k : binary variable indicating if factor f_j is hosted on agent a_k. fs = _build_fs_binvar(facs_to_host, agents_names) # alpha_ijk : binary variable indicating if x_i and f_j are both on a_k. alphas = _build_alphaijk_binvars(cg, agents_names) # LP problem with objective function (total communication cost). pb = LpProblem('distribution', LpMinimize) pb += _objective_function(cg, communication_load, alphas, agents_names), 'Communication costs' # Constraints. # All variable computations must be hosted: for i in vars_to_host: pb += lpSum([xs[(i, k)] for k in agents_names]) == 1, \ 'var {} is hosted'.format(i) # All factor computations must be hosted: for j in facs_to_host: pb += lpSum([fs[(j, k)] for k in agents_names]) == 1, \ 'factor {} is hosted'.format(j) # Each agent must host at least one computation: # We only need this constraints for agents that do not already host a # computation: empty_agents = [a for a in agents_names if not hints.must_host(a)] for k in empty_agents: pb += lpSum([xs[(i, k)] for i in vars_to_host]) + \ lpSum([fs[(j, k)] for j in facs_to_host]) >= 1, \ 'atleastone {}'.format(k) # Memory capacity constraint for agents for a in agents: # Decrease capacity for already hosted computations capacity = a.capacity - \ sum([_computation_memory_in_cg(c, cg, computation_memory) for c in hints.must_host(a.name)]) pb += lpSum([_computation_memory_in_cg(i, cg, computation_memory) * xs[(i, a.name)] for i in vars_to_host]) \ + lpSum([_computation_memory_in_cg(j, cg, computation_memory) * fs[(j, a.name)] for j in facs_to_host]) <= capacity, \ 'memory {}'.format(a.name) # Linearization constraints for alpha_ijk. for link in cg.links: i, j = link.variable_node, link.factor_node for k in agents_names: if i in vars_to_host and j in facs_to_host: pb += alphas[((i, j), k)] <= xs[(i, k)], \ 'lin1 {}{}{}'.format(i, j, k) pb += alphas[((i, j), k)] <= fs[(j, k)], \ 'lin2 {}{}{}'.format(i, j, k) pb += alphas[((i, j), k)] >= xs[(i, k)] + fs[(j, k)] - 1, \ 'lin3 {}{}{}'.format(i, j, k) elif i in vars_to_host and j not in facs_to_host: # Var is free, factor is already hosted if fixed_dist.agent_for(j) == k: pb += alphas[((i, j), k)] == xs[(i, k)] else: pb += alphas[((i, j), k)] == 0 elif i not in vars_to_host and j in facs_to_host: # if i is not in vars_vars_to_host, it means that it's a # computation that is already hosted (from hints) if fixed_dist.agent_for(i) == k: pb += alphas[((i, j), k)] == fs[(j, k)] else: pb += alphas[((i, j), k)] == 0 else: # i and j are both alredy hosted if fixed_dist.agent_for(i) == k and fixed_dist.agent_for(j) \ == k: pb += alphas[((i, j), k)] == 1 else: pb += alphas[((i, j), k)] == 0 # Now solve our LP # status = pb.solve(GLPK_CMD()) # status = pb.solve(GLPK_CMD(mip=1)) # status = pb.solve(GLPK_CMD(mip=0, keepFiles=1, # options=['--simplex', '--interior'])) status = pb.solve(GLPK_CMD(keepFiles=0, msg=False, options=['--pcost'])) if status != LpStatusOptimal: raise ImpossibleDistributionException("No possible optimal" " distribution ") else: logger.debug('GLPK cost : %s', value(pb.objective)) comp_dist = fixed_dist for k in agents_names: agt_vars = [i for i, ka in xs if ka == k and value(xs[(i, ka)]) == 1] comp_dist.host_on_agent(k, agt_vars) agt_rels = [j for j, ka in fs if ka == k and value(fs[(j, ka)]) == 1] comp_dist.host_on_agent(k, agt_rels) return comp_dist