def merge_and_split_partition(self, i): # first find best merge successors = [] if two_hop_only: candidate_partitions = self.state.neighbors_two_hop(i) else: candidate_partitions = self.state.get_partitions() if len(candidate_partitions) == 0: return [] for j in candidate_partitions: if i == j: continue successors.append(Merge(i,j)) scores = [] for merge in successors: score = self.score_change(merge) assert score != None and str(score) != 'nan' scores.append(score) merge_op = successors[utils.get_max(scores)] j = merge_op.j # then find best split of a merged i,j nodes = self.state.get_nodes(i)[:] + self.state.get_nodes(j)[:] g1 = nodes g2 = [] g1_edges = {} g2_edges = {} edges_11 = self.state.edge_count(i) + self.state.edge_count(j) + self.state.edge_count(i,j) edges_12 = 0 edges_22 = 0 nbrs = self.state.neighbors([i,j]) for k in nbrs: g1_edges[k] = self.state.edge_count(i, k) + self.state.edge_count(j, k) g2_edges[k] = 0 while len(g2) < self.k: # find best node from g1 to move to g2 max = None argmax = None if random_splitmerge and len(g2) == 0: argmax = random.choice(g1) else: for node in g1: ll = 0 # compute change in score between g1 and g2 if we move node to g2 edges_node_1 = self.state.edge_count([node], g1) edges_node_2 = self.state.edge_count([node], g2) tmp_edges_11 = edges_11 - edges_node_1 tmp_edges_12 = edges_12 - edges_node_2 + edges_node_1 tmp_edges_22 = edges_22 + edges_node_2 ll += self.__likelihood_partition_pair(tmp_edges_12, len(g1)-1, len(g2)+1) ll += self.__likelihood_partition_pair(tmp_edges_11, len(g1)-1) ll += self.__likelihood_partition_pair(tmp_edges_22, len(g2)+1) # compute change in edge counts and score if we move node to g2 for k in nbrs: nodes_k = self.state.node_count(k) # compute change in edges edges_node_k = self.state.edge_count([node], self.state.get_nodes(k)) edges_1k = g1_edges[k] - edges_node_k edges_2k = g2_edges[k] + edges_node_k ll += self.__likelihood_partition_pair(edges_1k, len(g1)-1, nodes_k) ll += self.__likelihood_partition_pair(edges_2k, len(g2)+1, nodes_k) if ll > max: max = ll argmax = node assert argmax != None edges_node_1 = self.state.edge_count([argmax], g1) edges_node_2 = self.state.edge_count([argmax], g2) edges_11 = edges_11 - edges_node_1 edges_12 = edges_12 - edges_node_2 + edges_node_1 edges_22 = edges_22 + edges_node_2 assert edges_12 >= 0 and edges_11 >= 0 and edges_22 >= 0 for k in nbrs: edges_node_k = self.state.edge_count([argmax], self.state.get_nodes(k)) g1_edges[k] = g1_edges[k] - edges_node_k g2_edges[k] = g2_edges[k] + edges_node_k assert g1_edges[k] >= 0 and g2_edges[k] >= 0 g1.remove(argmax) g2.append(argmax) s_g1 = set(g1) s_g2 = set(g2) s_i = set(self.state.get_nodes(i)) s_j = set(self.state.get_nodes(j)) if s_g1 == s_i: assert s_g2 == s_j return [ ] elif s_g1 == s_j: assert s_g2 == s_i return [ ] else: return [ MergeSplit(i, j, g1, g2) ]
def merge_and_split_partition(self, i): # first find best merge successors = [] if two_hop_only: candidate_partitions = self.state.neighbors_two_hop(i) else: candidate_partitions = self.state.get_partitions() if len(candidate_partitions) == 0: return [] for j in candidate_partitions: if i == j: continue successors.append(Merge(i, j)) scores = [] for merge in successors: score = self.score_change(merge) assert score != None and str(score) != 'nan' scores.append(score) merge_op = successors[utils.get_max(scores)] j = merge_op.j # then find best split of a merged i,j nodes = self.state.get_nodes(i)[:] + self.state.get_nodes(j)[:] g1 = nodes g2 = [] g1_edges = {} g2_edges = {} edges_11 = self.state.edge_count(i) + self.state.edge_count( j) + self.state.edge_count(i, j) edges_12 = 0 edges_22 = 0 nbrs = self.state.neighbors([i, j]) for k in nbrs: g1_edges[k] = self.state.edge_count(i, k) + self.state.edge_count( j, k) g2_edges[k] = 0 while len(g2) < self.k: # find best node from g1 to move to g2 max = None argmax = None if random_splitmerge and len(g2) == 0: argmax = random.choice(g1) else: for node in g1: ll = 0 # compute change in score between g1 and g2 if we move node to g2 edges_node_1 = self.state.edge_count([node], g1) edges_node_2 = self.state.edge_count([node], g2) tmp_edges_11 = edges_11 - edges_node_1 tmp_edges_12 = edges_12 - edges_node_2 + edges_node_1 tmp_edges_22 = edges_22 + edges_node_2 ll += self.__likelihood_partition_pair( tmp_edges_12, len(g1) - 1, len(g2) + 1) ll += self.__likelihood_partition_pair( tmp_edges_11, len(g1) - 1) ll += self.__likelihood_partition_pair( tmp_edges_22, len(g2) + 1) # compute change in edge counts and score if we move node to g2 for k in nbrs: nodes_k = self.state.node_count(k) # compute change in edges edges_node_k = self.state.edge_count( [node], self.state.get_nodes(k)) edges_1k = g1_edges[k] - edges_node_k edges_2k = g2_edges[k] + edges_node_k ll += self.__likelihood_partition_pair( edges_1k, len(g1) - 1, nodes_k) ll += self.__likelihood_partition_pair( edges_2k, len(g2) + 1, nodes_k) if ll > max: max = ll argmax = node assert argmax != None edges_node_1 = self.state.edge_count([argmax], g1) edges_node_2 = self.state.edge_count([argmax], g2) edges_11 = edges_11 - edges_node_1 edges_12 = edges_12 - edges_node_2 + edges_node_1 edges_22 = edges_22 + edges_node_2 assert edges_12 >= 0 and edges_11 >= 0 and edges_22 >= 0 for k in nbrs: edges_node_k = self.state.edge_count([argmax], self.state.get_nodes(k)) g1_edges[k] = g1_edges[k] - edges_node_k g2_edges[k] = g2_edges[k] + edges_node_k assert g1_edges[k] >= 0 and g2_edges[k] >= 0 g1.remove(argmax) g2.append(argmax) s_g1 = set(g1) s_g2 = set(g2) s_i = set(self.state.get_nodes(i)) s_j = set(self.state.get_nodes(j)) if s_g1 == s_i: assert s_g2 == s_j return [] elif s_g1 == s_j: assert s_g2 == s_i return [] else: return [MergeSplit(i, j, g1, g2)]
def annealing(problem, pickle_filename=None, working_dir=None): logging_cycle = 10 checkpoint_cycle = 100 t = 0 max_score = problem.get_score() argmax_score = problem.copy_state() max_saved_score = None uphill = 0 downhill = 0 stationery = 0 while True: problem.sanity_check() score = problem.get_score() assert score <= 0, "Positive score! Expected score is log-likelihood which is <= 0" if score > max_score: argmax_score = problem.copy_state() max_score = score if 0-score < 0.0000001: print "Found state with score=%g, exiting" % (round(score, 3)) return argmax_score if t % checkpoint_cycle == 0: state = problem.copy_state() filename_state = working_dir + '/%s.%d.i.%d.partition' % (state.g.name, problem.k, t) print "checkpoint: saving state to filename=%s" % filename_state intermediate_partition = [] for gid in state.get_partitions(): intermediate_partition.append(state.get_nodes(gid)) p = partition.Partition(groups=intermediate_partition) p.save(filename_state) if max_score > max_saved_score: state = argmax_score filename_best = working_dir + '/%s.%d.b.partition' % (state.g.name, problem.k) print "checkpoint: saving best (with score=%g) to filename=%s" % (max_score, filename_best) intermediate_partition = [] for gid in state.get_partitions(): intermediate_partition.append(state.get_nodes(gid)) p = partition.Partition(groups=intermediate_partition) p.save(filename_best) max_saved_score = max_score if t % logging_cycle == 0: avg_size = 1. * sum(map(len, problem.get_current_state().partition.values())) \ / problem.get_current_state().num_partitions() (noops, splits, merges, mergesplits, flips, cache_hit_rate) = problem.get_stats() s = "noops=%d splits=%d merges=%d mergesplits=%d flips=%d cache=%g" % (noops, splits, merges, mergesplits, flips, cache_hit_rate) if merges > 0: (nbr_merge, nbr_nbr_merge, nonnbr_merge) = \ (1.*problem.nbr_merge/merges,\ 1.*problem.nbr_nbr_merge/merges, \ 1.*problem.nonnbr_merge/merges) s2 = "nbr_merge=%g nbr_nbr_merge=%g nonnbr_merge=%g" % (nbr_merge, nbr_nbr_merge, nonnbr_merge) problem.nbr_merge = problem.nbr_nbr_merge = problem.nonnbr_merge = 0 else: s2 = "nbr_merge=%g nbr_nbr_merge=%g nonnbr_merge=%g" % (0,0,0) assert t == 0 or (uphill + downhill + stationery) == logging_cycle, "Some moves unnaccounted!" print "steps=%d n=%d k=%d avg_size=%g uphill=%g downhill=%g stationery=%g %s %s ll=%g" % \ (t, problem.n, problem.k, round(avg_size,2), 1.*uphill/logging_cycle, \ 1.*downhill/logging_cycle, \ 1.*stationery/logging_cycle, \ s, s2, score) uphill = downhill = stationery = 0 sys.stdout.flush() T = problem.temperature(t) if T == 0: return argmax_score state_changes = problem.successors() scores = [] for state_change in state_changes: score = problem.score_change(state_change) assert str(score) != 'nan' scores.append(score) if len(scores) > 0: if sample_successor: probs = utils.normalize(scores, 1./T) idx = utils.sample(probs) score_change = scores[idx] else: idx = utils.get_max(scores) score_change = scores[idx] if score_change > 0: uphill += 1 problem.apply_change(state_changes[idx]) elif random.random() < math.exp(score_change/T): downhill += 1 problem.apply_change(state_changes[idx]) else: stationery += 1 problem.apply_noop_change() else: stationery += 1 problem.apply_noop_change() t += 1