def solve(self): """ Solves the matching problem and returns a solution if possible. If problem is impossible, raises a ValueError """ logger.debug(f"Creating Solver!") if self.use_gurobi: solver = pylp.create_linear_solver(pylp.Preference.Gurobi) # set num threads sometimes causes an error. See issue #5 on # github.com/funkey/pylp # solver.set_num_threads(1) else: solver = pylp.create_linear_solver(pylp.Preference.Scip) # don't set num threads. It leads to a core dump solver.initialize(self.num_variables, pylp.VariableType.Binary) solver.set_timeout(self.timeout) solver.set_objective(self.objective) logger.debug(f"Starting Solve: {self.num_variables} indicators " f"and {self.num_constraints} constraints!") solver.set_constraints(self.constraints) solution, message = solver.solve() logger.debug(f"Finished solving!, got message ({message})") if "NOT" in message and not "feasible solutions found" in message: raise ValueError(message) return solution
def gurobi_installed_with_license(): try: solver = pylp.create_linear_solver(pylp.Preference.Gurobi) solver.initialize(1, pylp.VariableType.Binary) objective = pylp.LinearObjective(1) objective.set_coefficient(1, 1) solver.set_objective(objective) constraints = pylp.LinearConstraints() solver.set_constraints(constraints) solution, message = solver.solve() success = True except RuntimeError: success = False return pytest.mark.skipif(not success, reason="Requires Gurobi License")
def find_optimal_split(synapse_ids, superset_by_synapse_id, nt_by_synapse_id, neurotransmitters, supersets, train_fraction=0.8, ensure_non_empty=True): """Find optimal synapse split per neurotransmitter and synapse superset (e.g. hemi lineage/neuron/brain region) Args: synapse_ids (List): Synapse ids to consider superset_by_synapse_id (dict): Mapping from each synapse_id in synapse_ids to its associated superset. nt_by_synapse_id (dict): Mapping from each synapse_id in synapse_ids to its associated nt. neurotransmitters (List of tuples): List of neurotransmitters to consider. supersets (List of objects): List of supersets to consider. train_fraction (float): Fraction of synapses to assign to training """ # Constuct combined dict: synapses_by_superset_and_nt = {(ss, nt): [] for ss in supersets for nt in neurotransmitters} synapse_ids_by_nt = {nt: [] for nt in neurotransmitters} for synapse_id in synapse_ids: ss = superset_by_synapse_id[synapse_id] nt = nt_by_synapse_id[synapse_id] synapses_by_superset_and_nt[(ss, nt)].append(synapse_id) synapse_ids_by_nt[nt].append(synapse_id) # find optimal 80/20 split num_variables = 0 train_indicators = {} target = {} sum_synapses = {} slack_u = {} slack_l = {} constraints = pylp.LinearConstraints() # for each NT: for nt in neurotransmitters: # compute target value: target_NT target[nt] = int(train_fraction * len(synapse_ids_by_nt[nt])) # let s_NT be sum of synapses in training for NT sum_synapses[nt] = num_variables num_variables += 1 # measure distance to target_NT: d_NT = s_NT - target_NT sum_constraint = pylp.LinearConstraint() # for each HL: for ss in supersets: # add indicator for using (HL, NT) in train: i_HL_NT i = num_variables num_variables += 1 train_indicators[(ss, nt)] = i # i_HL_NT * #_of_synapses... sum_constraint.set_coefficient( i, len(synapses_by_superset_and_nt[(ss, nt)])) # ... - s_NT = 0 sum_constraint.set_coefficient(sum_synapses[nt], -1) sum_constraint.set_relation(pylp.Relation.Equal) sum_constraint.set_value(0) constraints.add(sum_constraint) # add two slack variables for s_NT: slack_u[nt] = num_variables num_variables += 1 slack_l[nt] = num_variables num_variables += 1 # su_NT ≥ d_NT = s_NT - target_NT su_NT ≥ 0 # target_NT ≥ d_NT - su_NT = s_NT - su_NT su_NT ≥ 0 # sl_NT ≥ -d_NT = target_NT - s_NT sl_NT ≥ 0 # -target_NT ≥ -d_NT - sl_NT = -s_NT - sl_NT sl_NT ≥ 0 slack_constraint_u = pylp.LinearConstraint() slack_constraint_l = pylp.LinearConstraint() slack_constraint_u_0 = pylp.LinearConstraint() slack_constraint_l_0 = pylp.LinearConstraint() slack_constraint_u.set_coefficient(sum_synapses[nt], 1) slack_constraint_u.set_coefficient(slack_u[nt], -1) slack_constraint_u.set_relation(pylp.Relation.LessEqual) slack_constraint_u.set_value(target[nt]) slack_constraint_u_0.set_coefficient(slack_u[nt], 1) slack_constraint_u_0.set_relation(pylp.Relation.GreaterEqual) slack_constraint_u_0.set_value(0) slack_constraint_l.set_coefficient(sum_synapses[nt], -1) slack_constraint_l.set_coefficient(slack_l[nt], -1) slack_constraint_l.set_relation(pylp.Relation.LessEqual) slack_constraint_l.set_value(-target[nt]) slack_constraint_l_0.set_coefficient(slack_l[nt], 1) slack_constraint_l_0.set_relation(pylp.Relation.GreaterEqual) slack_constraint_l_0.set_value(0) constraints.add(slack_constraint_u) constraints.add(slack_constraint_l) constraints.add(slack_constraint_u_0) constraints.add(slack_constraint_l_0) # ensure that either all or none of the NTs per hemi-lineages are used for # training for ss in supersets: prev_nt = None for nt in neurotransmitters: if prev_nt is not None: joint_constraint = pylp.LinearConstraint() joint_constraint.set_coefficient( train_indicators[(ss, prev_nt)], 1) joint_constraint.set_coefficient(train_indicators[(ss, nt)], -1) joint_constraint.set_relation(pylp.Relation.Equal) joint_constraint.set_value(0) constraints.add(joint_constraint) prev_nt = nt # Ensure that at least one superset is in test for each nt if ensure_non_empty: for nt in neurotransmitters: non_zero_constraint = pylp.LinearConstraint() non_one_constraint = pylp.LinearConstraint() # Compute number of supersets in nt: non_zero_ss = 0 for ss in supersets: if synapses_by_superset_and_nt[(ss, nt)]: non_zero_ss += 1 # If there are more than one superset # require that the number of supersets # in test and train is at least 1 if non_zero_ss > 1: for ss in supersets: if synapses_by_superset_and_nt[(ss, nt)]: non_zero_constraint.set_coefficient( train_indicators[(ss, nt)], 1) non_one_constraint.set_coefficient( train_indicators[(ss, nt)], 1) non_zero_constraint.set_relation(pylp.Relation.GreaterEqual) non_zero_constraint.set_value(1) non_one_constraint.set_relation(pylp.Relation.LessEqual) non_one_constraint.set_value(non_zero_ss - 1) constraints.add(non_zero_constraint) constraints.add(non_one_constraint) # add sl_NT + su_NT to objective objective = pylp.LinearObjective(num_variables) for nt in neurotransmitters: objective.set_coefficient(slack_u[nt], 1. / target[nt]) objective.set_coefficient(slack_l[nt], 1. / target[nt]) variable_types = pylp.VariableTypeMap() for nt in neurotransmitters: variable_types[slack_u[nt]] = pylp.VariableType.Integer variable_types[slack_l[nt]] = pylp.VariableType.Integer variable_types[sum_synapses[nt]] = pylp.VariableType.Integer solver = pylp.create_linear_solver(pylp.Preference.Gurobi) solver.initialize(num_variables, pylp.VariableType.Binary, variable_types) solver.set_objective(objective) solver.set_constraints(constraints) solution, msg = solver.solve() print(msg) train_synapses_by_ss = {} test_synapses_by_ss = {} for nt in neurotransmitters: print(nt, float(solution[sum_synapses[nt]]) / len(synapse_ids_by_nt[nt]), "% ", solution[sum_synapses[nt]], '/', len(synapse_ids_by_nt[nt]), '(', target[nt], ')') for ss in supersets: if len(synapses_by_superset_and_nt[(ss, nt)]) > 0: if solution[train_indicators[(ss, nt)]] > 0.5: if ss in list(train_synapses_by_ss): pass else: train_synapses_by_ss[ss] = [] train_synapses_by_ss[ss].extend( synapses_by_superset_and_nt[(ss, nt)]) print('+', ss) else: if ss in list(test_synapses_by_ss): pass else: test_synapses_by_ss[ss] = [] test_synapses_by_ss[ss].extend( synapses_by_superset_and_nt[(ss, nt)]) print('-', ss) return train_synapses_by_ss, test_synapses_by_ss
def initialize(self): logger.info("Creating Indicators...") start_time = time.time() self.__create_indicators() logger.info("...took %s seconds" % (time.time() - start_time)) logger.info("Getting continuation constraints...") start_time = time.time() self.__get_continuation_constraints() logger.info("...took %s seconds" % (time.time() - start_time)) logger.info("Setting objective...") start_time = time.time() self.backend = pylp.create_linear_solver(pylp.Preference.Gurobi) self.backend.initialize(self.n_triplets, pylp.VariableType.Binary) self.backend.set_num_threads(1) self.objective = pylp.LinearObjective(self.n_triplets) self.constraints = pylp.LinearConstraints() for t in self.triplets.keys(): self.objective.set_coefficient(t, self.get_cost(t)) constraint = pylp.LinearConstraint() if t in self.t_selected: constraint.set_coefficient(t, 1) constraint.set_relation(pylp.Relation.Equal) constraint.set_value(1) self.constraints.add(constraint) self.backend.set_objective(self.objective) logger.info("...took %s seconds" % (time.time() - start_time)) logger.info("Setting center conflicts...") start_time = time.time() for conflict in self.t_center_conflicts: constraint = pylp.LinearConstraint() all_solved = True for t in conflict: if not t in self.t_solved: all_solved = False constraint.set_coefficient(t, 1) if not all_solved: constraint.set_relation(pylp.Relation.LessEqual) constraint.set_value(1) self.constraints.add(constraint) logger.info("...took %s seconds" % (time.time() - start_time)) logger.info("Setting continuation constraints...") start_time = time.time() for continuation_constraint in self.continuation_constraints: t_l = continuation_constraint["t_l"] t_r = continuation_constraint["t_r"] all_solved = np.all([t in self.t_solved for t in (t_l + t_r)]) if not all_solved: constraint = pylp.LinearConstraint() for t in t_l: constraint.set_coefficient(t, 1) for t in t_r: constraint.set_coefficient(t, -1) constraint.set_relation(pylp.Relation.Equal) constraint.set_value(0) self.constraints.add(constraint) logger.info("...took %s seconds" % (time.time() - start_time)) logger.info("Setting must pick one constraints...") start_time = time.time() for must_pick_one in self.e_selected + self.v_selected: constraint = pylp.LinearConstraint() if must_pick_one: for t in must_pick_one: constraint.set_coefficient(t, 1) constraint.set_relation(pylp.Relation.GreaterEqual) constraint.set_value(1) self.constraints.add(constraint) self.backend.set_constraints(self.constraints) logger.info("...took %s seconds" % (time.time() - start_time))
def __init__(self, g1, distance_factor, orientation_factor, start_edge_prior, comb_angle_factor, vertex_selection_cost, backend="Gurobi"): if backend == "Gurobi": logger.info("Use Gurobi backend") self.backend = pylp.create_linear_solver(pylp.Preference.Gurobi) elif backend == "Scip": logger.info("Use Scip backend") self.backend = pylp.create_linear_solver(pylp.Preference.Scip) else: raise NotImplementedError("Choose between Gurobi or Scip backend") g1.reindex_edges_save() self.g1 = g1 self.distance_factor = distance_factor self.orientation_factor = orientation_factor self.start_edge_prior = start_edge_prior self.comb_angle_factor = comb_angle_factor self.vertex_selection_cost = vertex_selection_cost self.vertex_cost = g1.get_vertex_cost() self.edge_cost = g1.get_edge_cost(distance_factor, orientation_factor, start_edge_prior) self.edge_combination_cost, self.edges_to_middle =\ g1.get_edge_combination_cost(comb_angle_factor=comb_angle_factor, return_edges_to_middle=True) self.n_vertices = g1.get_number_of_vertices() self.n_dummy = self.n_vertices self.n_edges = g1.get_number_of_edges() + self.n_dummy self.n_comb_edges = len(self.edge_combination_cost) # Variables are vertices, edges and combination of egdes self.n_variables = self.n_vertices + self.n_edges + self.n_comb_edges self.backend.initialize(self.n_variables, pylp.VariableType.Binary) self.objective = pylp.LinearObjective(self.n_variables) """ Set costs """ binary_id = 0 # Add one variable for each vertex (selection cost only for mt's) self.vertex_to_binary = {} self.binary_to_vertex = {} for v in g1.get_vertex_iterator(): self.objective.set_coefficient(binary_id, self.vertex_selection_cost +\ self.vertex_cost[v]) self.vertex_to_binary[v] = binary_id self.binary_to_vertex[binary_id] = v binary_id += 1 assert (binary_id == self.n_vertices) # Add one variable for each edge self.edge_to_binary = {} self.binary_to_edge = {} for e in g1.get_edge_iterator(): self.objective.set_coefficient(binary_id, self.edge_cost[e]) self.edge_to_binary[e] = binary_id self.binary_to_edge[binary_id] = e binary_id += 1 # Add one dummy edge for each vertex self.dummy_to_binary = {} self.binary_to_dummy = {} for v in g1.get_vertex_iterator(): self.objective.set_coefficient(binary_id, self.edge_cost[G1.START_EDGE]) self.dummy_to_binary[v] = binary_id self.binary_to_dummy[binary_id] = v binary_id += 1 assert (binary_id == self.n_vertices + self.n_edges) # Add one variable for each combination of edges: self.comb_to_binary = {} self.binary_to_comb = {} for ee, cost in self.edge_combination_cost.iteritems(): self.objective.set_coefficient(binary_id, cost) self.comb_to_binary[ee] = binary_id self.binary_to_comb[binary_id] = ee binary_id += 1 assert (binary_id == self.n_variables) self.backend.set_objective(self.objective) """ Constraints """ self.constraints = pylp.LinearConstraints() # Edge selection implies vertex selection: for e in g1.get_edge_iterator(): v0 = e.source() v1 = e.target() constraint = pylp.LinearConstraint() constraint.set_coefficient(self.edge_to_binary[e], 2) constraint.set_coefficient(self.vertex_to_binary[v0], -1) constraint.set_coefficient(self.vertex_to_binary[v1], -1) constraint.set_relation(pylp.Relation.LessEqual) constraint.set_value(0) self.constraints.add(constraint) # Vertex selection implies 2 edges: for v in g1.get_vertex_iterator(): incident_edges = g1.get_incident_edges(v) constraint = pylp.LinearConstraint() constraint.set_coefficient(self.vertex_to_binary[v], 2) constraint.set_coefficient(self.dummy_to_binary[v], -1) for e in incident_edges: constraint.set_coefficient(self.edge_to_binary[e], -1) constraint.set_relation(pylp.Relation.Equal) constraint.set_value(0) self.constraints.add(constraint) # Combination of 2 edges implies edges and vice versa: for ee in self.edge_combination_cost.keys(): e0 = ee[0] e1 = ee[1] assert (e0 != G1.START_EDGE or e1 != G1.START_EDGE) if e0 == G1.START_EDGE: middle_vertex = self.edges_to_middle[ee] b0 = self.dummy_to_binary[middle_vertex] b1 = self.edge_to_binary[e1] elif e1 == G1.START_EDGE: middle_vertex = self.edges_to_middle[ee] b0 = self.edge_to_binary[e0] b1 = self.dummy_to_binary[middle_vertex] else: b0 = self.edge_to_binary[e0] b1 = self.edge_to_binary[e1] constraint = pylp.LinearConstraint() constraint.set_coefficient(self.comb_to_binary[ee], 2) constraint.set_coefficient(b0, -1) constraint.set_coefficient(b1, -1) constraint.set_relation(pylp.Relation.LessEqual) constraint.set_value(0) self.constraints.add(constraint) # Edges implies combination: constraint = pylp.LinearConstraint() constraint.set_coefficient(b0, 1) constraint.set_coefficient(b1, 1) constraint.set_coefficient(self.comb_to_binary[ee], -1) constraint.set_relation(pylp.Relation.LessEqual) constraint.set_value(1) self.constraints.add(constraint) # Add partner constraints: for v in g1.get_vertex_iterator(): partner = g1.get_partner(v) if partner != -1: if v < partner: constraint = pylp.LinearConstraint() constraint.set_coefficient(self.vertex_to_binary[v], 1) constraint.set_coefficient(self.vertex_to_binary[partner], 1) constraint.set_relation(pylp.Relation.LessEqual) constraint.set_value(1) self.constraints.add(constraint) self.backend.set_constraints(self.constraints)
def __init__(self, g2, backend="Gurobi"): self.g2_vertices_N = g2.get_number_of_vertices() if backend == "Gurobi": logger.info("Use Gurobi backend") self.backend = pylp.create_linear_solver(pylp.Preference.Gurobi) elif backend == "Scip": logger.info("Use Scip backend") self.backend = pylp.create_linear_solver(pylp.Preference.Scip) else: raise NotImplementedError("Choose between Gurobi or Scip backend") self.backend.initialize(self.g2_vertices_N, pylp.VariableType.Binary) self.backend.set_num_threads(1) self.objective = pylp.LinearObjective(self.g2_vertices_N) #pylp.set_log_level(pylp.LogLevel.Debug) g2_vertex_index_map = g2.get_vertex_index_map() self.constraints = pylp.LinearConstraints() for v in g2.get_vertex_iterator(): self.objective.set_coefficient(g2_vertex_index_map[v], g2.get_cost(v)) constraint = pylp.LinearConstraint() if g2.get_forced(v): constraint.set_coefficient(v, 1) constraint.set_relation(pylp.Relation.Equal) constraint.set_value(1) self.constraints.add(constraint) self.backend.set_objective(self.objective) for conflict in g2.get_conflicts(): constraint = pylp.LinearConstraint() all_solved = True for v in conflict: if not g2.get_solved(v): all_solved = False constraint.set_coefficient(v, 1) if not all_solved: constraint.set_relation(pylp.Relation.LessEqual) constraint.set_value(1) self.constraints.add(constraint) for sum_constraint in g2.get_sum_constraints(): vertices_1 = sum_constraint[0] vertices_2 = sum_constraint[1] constraint = pylp.LinearConstraint() for v in vertices_1: constraint.set_coefficient(v, 1) for v in vertices_2: constraint.set_coefficient(v, -1) all_solved = True for v in vertices_1 + vertices_2: if not g2.get_solved(v): all_solved = False break if not all_solved: constraint.set_relation(pylp.Relation.Equal) constraint.set_value(0) self.constraints.add(constraint) for must_pick_one in g2.get_must_pick_one(): constraint = pylp.LinearConstraint() if must_pick_one: for v in must_pick_one: constraint.set_coefficient(v, 1) constraint.set_relation(pylp.Relation.GreaterEqual) constraint.set_value(1) self.constraints.add(constraint) self.backend.set_constraints(self.constraints)
def __init__(self, graph_in, classA, classB, classC): self.graph = graph_in #n_vertices = graph.get_num_vertices() self.backend = pylp.create_linear_solver(pylp.Preference.Scip) #self.graph = graph #vertices = graph.vertices NUM_LEAVES = len(self.graph.nodes[0]['cost']) NUM_GRAPH_NODES = len(self.graph.nodes) NUM_GRAPH_EDGES = len(self.graph.edges) NUM_SOLVER_COEFF = NUM_GRAPH_NODES * NUM_LEAVES + NUM_GRAPH_EDGES GRAPH_NODES = list(self.graph.nodes) GRAPH_EDGES = list(self.graph.edges) self.backend.initialize(NUM_SOLVER_COEFF, pylp.VariableType.Binary) # backend.set_num_threads(1) objective = pylp.LinearObjective(NUM_SOLVER_COEFF) print('adding objective node coefficients') #add variables to objective per vertex id = 0 self.vertex_id = [] self.vertex_class_id = [] for v in GRAPH_NODES: leaf_id = [] class_id = [] for leaf in range(NUM_LEAVES): objective.set_coefficient( id, -1 * self.graph.nodes[v]['cost'][leaf]) leaf_id.append(id) id += 1 # add indicator variables for class A,B,C as well for c in range(3): objective.set_coefficient(id, 1) class_id.append(id) id += 1 self.vertex_id.append(leaf_id) self.vertex_class_id.append(class_id) assert (id == NUM_GRAPH_NODES * NUM_LEAVES) print('adding objective edge coefficients') self.edge_nodes = [] self.obj_edge_id = [] for e in GRAPH_EDGES: objective.set_coefficient(id, 1) self.edge_nodes.append(e) self.obj_edge_id.append(id) id += 1 assert (id == NUM_SOLVER_COEFF) # #add variables to objective per vertex # binary_id = 0 # vertex_to_binary = {} # binary_to_vertex = {} # for v in g1.get_vertex_iterator(): # objective.set_coefficient(binary_id,graph[v]) # # vertex_to_binary[v] = binary_id # binary_to_vertex[binary_id] = v # binary_id += 1 # # assert (binary_id == n_vertices) # #add variables to objective per edge # edge_to_binary = {} # binary_to_edge = {} # for e in g1.get_edge_iterator(): # objective.set_coefficient(binary_id, # edge_cost[e]) # edge_to_binary[e] = binary_id # binary_to_edge[binary_id] = e # binary_id += 1 # assert(binary_id == n_vertices + n_edges) # # self.backend.set_objective(objective) print('adding constraints') self.constraints = pylp.LinearConstraints() #add node constraints: binary, one leaf per node for v in self.vertex_id: constraintV = pylp.LinearConstraint() for l in v: constraint1 = pylp.LinearConstraint() constraint2 = pylp.LinearConstraint() constraint1.set_coefficient(l, 1) constraint1.set_relation(pylp.Relation.GreaterEqual) constraint1.set_value(0) constraint2.set_coefficient(l, 1) constraint2.set_relation(pylp.Relation.LessEqual) constraint2.set_value(1) constraintV.set_coefficient(l, 1) self.constraints.add(constraint1) self.constraints.add(constraint2) constraintV.set_relation(pylp.Relation.Equal) constraintV.set_value(1) self.constraints.add(constraintV) #add node constraints: binary, one class per node (redundant?) for v in self.vertex_class_id: constraintV = pylp.LinearConstraint() for l in v: constraint1 = pylp.LinearConstraint() constraint2 = pylp.LinearConstraint() constraint1.set_coefficient(l, 1) constraint1.set_relation(pylp.Relation.GreaterEqual) constraint1.set_value(0) constraint2.set_coefficient(l, 1) constraint2.set_relation(pylp.Relation.LessEqual) constraint2.set_value(1) constraintV.set_coefficient(l, 1) self.constraints.add(constraint1) self.constraints.add(constraint2) constraintV.set_relation(pylp.Relation.Equal) constraintV.set_value(1) self.constraints.add(constraintV) #add node constraints: node leaf sets node class for v in self.vertex_id: constraintA = pylp.LinearConstraint() for a in classA: constraintA.set_coefficient(v[a], 1) constraintA.set_coefficient(self.vertex_class_id[0], -1) constraintA.set_relation(pylp.Relation.Equal) constraintA.set_value(0) constraintB = pylp.LinearConstraint() for b in classB: constraintB.set_coefficient(v[b], 1) constraintB.set_coefficient(self.vertex_class_id[1], -1) constraintB.set_relation(pylp.Relation.Equal) constraintB.set_value(0) constraintC = pylp.LinearConstraint() for c in classC: constraintC.set_coefficient(v[c], 1) constraintC.set_coefficient(self.vertex_class_id[2], -1) constraintC.set_relation(pylp.Relation.Equal) constraintC.set_value(0) self.constraints.add(constraintA) self.constraints.add(constraintB) self.constraints.add(constraintC) #add edge constraints: edge can't span class A(lumen) to class C(cytosol) #TODO: double check for e in GRAPH_EDGES: constraint_orderF = pylp.LinearConstraint() constraint_orderR = pylp.LinearConstraint() start_idx = GRAPH_NODES.index(e[0]) end_idx = GRAPH_NODES.index(e[1]) constraint_orderF.add_coefficient( self.vertex_class_id[start_idx][0], 1) constraint_orderF.add_coefficient(self.vertex_class_id[end_idx][2], 1) constraint_orderF.add_relation(pylp.Relation.Equal) constraint_orderF.add_value(0) self.constraints.add(constraint_orderF) constraint_orderR.add_coefficient( self.vertex_class_id[start_idx][2], 1) constraint_orderR.add_coefficient(self.vertex_class_id[end_idx][0], 1) constraint_orderR.add_relation(pylp.Relation.Equal) constraint_orderR.add_value(0) self.constraints.add(constraint_orderR) #TODO: add constraint edges with different classes are cut # #Edge selection implies vertex selection # for e in g1.get_edge_iterator(): # v0 = e.source() # v1 = e.target() # # #TODO: XOR logic? # constraint = pylp.LinearConstraint() # constraint.set_coefficient(edge_to_binary[e], 2) # constraint.set_coefficient(vertex_to_binary[v0], -1) # constraint.set_coefficient(vertex_to_binary[v1], -1) # constraint.set_relation(pylp.Relation.LessEqual) # constraint.set_value(0) self.backend.set_objective(objective) self.backend.set_constraints(self.constraints)
def match_components(nodes_x, nodes_y, edges_xy, node_labels_x, node_labels_y, allow_many_to_many=False, edge_costs=None, no_match_costs=0, edge_conflicts=None): '''Match nodes from X to nodes from Y by selecting candidate edges x <-> y, such that the split/merge error induced from the labels for X and Y is minimized. Example:: X: Y: 1 a | | 2 b | \ 3 h c | | | 4 C i d | | | 5 j e | / 6 f | | 7 g A B 1-7: nodes in X labelled A; a-g: nodes in Y labelled B; h-j: nodes in Y labelled C. Assuming that all nodes in X can be matched to all nodes in Y in the same line (``edges_xy`` would be (1, a), (2, b), (3, h), (3, c), and so on), the solution would be to match: 1 - a 2 - b 3 - c 4 - d 5 - e 6 - f 7 - g h, i, and j would remain unmatched, since matching them would incur a split error of A into B and C. Args: nodes_x, nodes_y (array-like of ``int``): A list of IDs of set X and Y, respectively. edges_xy (array-like of tuple): A list of tuples ``(id_x, id_y)`` of matching edges to chose from. node_labels_x, node_labels_y (``dict``): A dictionary from IDs to labels. allow_many_to_many (``bool``, optional): If ``True``, allow that one node in X can match to multiple nodes in Y and vice versa. Default is ``False``. edge_costs (array-like of ``float``, optional): If given, defines a preference for selecting edges from ``edges_xy`` by contributing costs ``edge_costs[i]`` for edge ``edges_xy[i]``. The edge costs form a secondary objective, i.e., the matching is still performed to minimize the total number of errors (splits, merges, FPs, and FNs). However, for matching problems where several solutions exist with the same number of errors, the edge costs define a preference (e.g., by favouring matches between nodes that are spatially close, if the edge costs represent distances). See also ``no_match_costs``. no_match_costs (``float``, optional): A cost for not matching a node either in X or Y. Complementary to ``edge_costs``. edge_conflicts(``list of lists of tuples (id_x, id_y)`` of edges_xy, optional): Each list in edge conflicts should contain edges_xy that are in conflict with each other. That is for each set of edges edge_conflicts[i] only one edge is picked. Returns: (label_matches, node_matches, num_splits, num_merges, num_fps, num_fns) ``label_matches``: A list of tuples ``(label_x, label_y)`` of labels that got matched. ``node_matches``: A list of tuples ``(id_x, id_y)`` of nodes that got matched. Subset of ``edges_xy``. ``num_splits``, ``num_merges``, ...: The number of label splits, merges, false positives (unmatched in X), and false negatives (unmatched in Y). ''' if edge_costs is None and no_match_costs != 0: edge_costs = [0] * len(edges_xy) num_vars = 0 # add "no match in X" and "no match in Y" dummy nodes no_match_node = max(nodes_x + nodes_y) + 1 no_match_label = max(max(node_labels_x.keys()), max( node_labels_y.keys())) + 1 node_labels_x = dict(node_labels_x) node_labels_y = dict(node_labels_y) node_labels_x.update({no_match_node: no_match_label}) node_labels_y.update({no_match_node: no_match_label}) labels_x = set(node_labels_x.values()) labels_y = set(node_labels_y.values()) # add additional edges to dummy nodes edges_xy += [(n, no_match_node) for n in nodes_x] edges_xy += [(no_match_node, n) for n in nodes_y] # create indicator for each matching edge edge_indicators = {} edges_by_node_x = {} edges_by_node_y = {} for edge in edges_xy: edge_indicators[edge] = num_vars num_vars += 1 u, v = edge if u not in edges_by_node_x: edges_by_node_x[u] = [] if v not in edges_by_node_y: edges_by_node_y[v] = [] edges_by_node_x[u].append(edge) edges_by_node_y[v].append(edge) # Require that each node matches to exactly one (or at least one, depending # on the allow_many_to_many parameter) other node. Dummy nodes can match to # any number. constraints = pylp.LinearConstraints() for nodes, edges_by_node in zip([nodes_x, nodes_y], [edges_by_node_x, edges_by_node_y]): for node in nodes: if node == no_match_node: continue constraint = pylp.LinearConstraint() for edge in edges_by_node[node]: constraint.set_coefficient(edge_indicators[edge], 1) if allow_many_to_many: constraint.set_relation(pylp.Relation.GreaterEqual) else: constraint.set_relation(pylp.Relation.Equal) constraint.set_value(1) constraints.add(constraint) # add indicators for label matches label_indicators = {} edges_by_label_pair = {} for edge in edges_xy: label_pair = node_labels_x[edge[0]], node_labels_y[edge[1]] if label_pair not in label_indicators: label_indicators[label_pair] = num_vars num_vars += 1 if label_pair not in edges_by_label_pair: edges_by_label_pair[label_pair] = [] edges_by_label_pair[label_pair].append(edge) label_indicators[(no_match_label, no_match_label)] = num_vars num_vars += 1 # couple label indicators to edge indicators for label_pair, edges in edges_by_label_pair.items(): # y == 1 <==> sum(x1, ..., xn) > 0 # # y - sum(x1, ..., xn) <= 0 # sum(x1, ..., xn) - n*y <= 0 constraint1 = pylp.LinearConstraint() constraint2 = pylp.LinearConstraint() constraint1.set_coefficient(label_indicators[label_pair], 1) constraint2.set_coefficient(label_indicators[label_pair], -len(edges)) for edge in edges: constraint1.set_coefficient(edge_indicators[edge], -1) constraint2.set_coefficient(edge_indicators[edge], 1) constraint1.set_relation(pylp.Relation.LessEqual) constraint2.set_relation(pylp.Relation.LessEqual) constraint1.set_value(0) constraint2.set_value(0) constraints.add(constraint1) constraints.add(constraint2) if edge_conflicts is not None: for conflict in edge_conflicts: constraint = pylp.LinearConstraint() for edge in conflict: constraint.set_coefficient(edge_indicators[tuple(edge)], 1) constraint.set_relation(pylp.Relation.LessEqual) constraint.set_value(1) constraints.add(constraint) # pin no-match pair indicator to 1 constraint = pylp.LinearConstraint() no_match_indicator = label_indicators[(no_match_label, no_match_label)] constraint.set_coefficient(no_match_indicator, 1) constraint.set_relation(pylp.Relation.Equal) constraint.set_value(1) constraints.add(constraint) # add integer for splits # splits = sum of all label pair indicators - n # with n number of labels in x (including no-match) # sum - splits = n splits = num_vars num_vars += 1 constraint = pylp.LinearConstraint() for _, label_indicator in label_indicators.items(): constraint.set_coefficient(label_indicator, 1) constraint.set_coefficient(splits, -1) constraint.set_relation(pylp.Relation.Equal) constraint.set_value(len(labels_x)) constraints.add(constraint) # add integer for merges merges = num_vars num_vars += 1 constraint = pylp.LinearConstraint() for _, label_indicator in label_indicators.items(): constraint.set_coefficient(label_indicator, 1) constraint.set_coefficient(merges, -1) constraint.set_relation(pylp.Relation.Equal) constraint.set_value(len(labels_y)) constraints.add(constraint) # set objective objective = pylp.LinearObjective(num_vars) objective.set_coefficient(splits, 1) objective.set_coefficient(merges, 1) min_edge_cost = None if edge_costs is not None: edge_costs, no_match_costs = normalize_matching_costs( len(nodes_x), len(nodes_y), edge_costs, no_match_costs) edge_costs += [no_match_costs] * (len(nodes_x) + len(nodes_y)) min_edge_cost = min(edge_costs) for edge, cost in zip(edges_xy, edge_costs): objective.set_coefficient(edge_indicators[edge], cost) # solve logger.debug("Added %d constraints", len(constraints)) for i in range(len(constraints)): logger.debug(constraints[i]) logger.debug("Creating linear solver") solver = pylp.create_linear_solver(pylp.Preference.Any) variable_types = pylp.VariableTypeMap() variable_types[splits] = pylp.VariableType.Integer variable_types[merges] = pylp.VariableType.Integer if min_edge_cost is None: logger.debug("Set optimality gap to zero") solver.set_optimality_gap(0.0, True) else: logger.debug("Set optimality gap to lowest edge cost") epsilon = 10**(-4) solver.set_optimality_gap(max([min_edge_cost - epsilon, 0.0]), True) logger.debug("Initializing solver with %d variables", num_vars) solver.initialize(num_vars, pylp.VariableType.Binary, variable_types) logger.debug("Setting objective") solver.set_objective(objective) logger.debug("Setting constraints") solver.set_constraints(constraints) logger.debug("Solving...") solution, message = solver.solve() logger.debug("Solver returned: %s", message) if 'NOT' in message: raise RuntimeError("No optimal solution found...") # get label matches label_matches = [] for label_pair, label_indicator in label_indicators.items(): if no_match_node not in label_pair: if solution[label_indicator] > 0.5: label_matches.append(label_pair) # get node matches node_matches = [ e for e in edges_xy if solution[edge_indicators[e]] > 0.5 and no_match_node not in e ] # get error counts num_splits = solution[splits] num_merges = solution[merges] num_fps = 0 num_fns = 0 for label_pair, label_indicator in label_indicators.items(): if label_pair[0] == no_match_label: num_fps += solution[label_indicator] if label_pair[1] == no_match_label: num_fns += solution[label_indicator] num_fps -= 1 num_fns -= 1 num_splits -= num_fps num_merges -= num_fns return (label_matches, node_matches, num_splits, num_merges, num_fps, num_fns)