Example #1
0
	def merge_and_split_partition(self, i):

		# first find best merge
		successors = []
		if two_hop_only:
			candidate_partitions = self.state.neighbors_two_hop(i)
		else:
			candidate_partitions = self.state.get_partitions()

		if len(candidate_partitions) == 0:
			return []

		for j in candidate_partitions:
			if i == j:
				continue
			successors.append(Merge(i,j))

		scores = []
		for merge in successors:
			score = self.score_change(merge)
			assert score != None and str(score) != 'nan'
			scores.append(score)
		merge_op = successors[utils.get_max(scores)]
		j = merge_op.j

		# then find best split of a merged i,j
		nodes = self.state.get_nodes(i)[:] + self.state.get_nodes(j)[:]

		g1 = nodes
		g2 = []
		g1_edges = {}
		g2_edges = {}
		edges_11 = self.state.edge_count(i) + self.state.edge_count(j) + self.state.edge_count(i,j)
		edges_12 = 0
		edges_22 = 0

		nbrs = self.state.neighbors([i,j])
		for k in nbrs:
			g1_edges[k] = self.state.edge_count(i, k) + self.state.edge_count(j, k)
			g2_edges[k] = 0

		while len(g2) < self.k:

			# find best node from g1 to move to g2
			max = None
			argmax = None

			if random_splitmerge and len(g2) == 0:
				argmax = random.choice(g1)
			else:
				for node in g1:
					ll = 0

					# compute change in score between g1 and g2 if we move node to g2
					edges_node_1 = self.state.edge_count([node], g1)
					edges_node_2 = self.state.edge_count([node], g2)
					tmp_edges_11 = edges_11 - edges_node_1 
					tmp_edges_12 = edges_12 - edges_node_2 + edges_node_1
					tmp_edges_22 = edges_22 + edges_node_2

					ll += self.__likelihood_partition_pair(tmp_edges_12, len(g1)-1, len(g2)+1)
					ll += self.__likelihood_partition_pair(tmp_edges_11, len(g1)-1)
					ll += self.__likelihood_partition_pair(tmp_edges_22, len(g2)+1)

					# compute change in edge counts and score if we move node to g2
					for k in nbrs:
						nodes_k = self.state.node_count(k)

						# compute change in edges
						edges_node_k = self.state.edge_count([node], self.state.get_nodes(k))
						edges_1k = g1_edges[k] - edges_node_k
						edges_2k = g2_edges[k] + edges_node_k

						ll += self.__likelihood_partition_pair(edges_1k, len(g1)-1, nodes_k)
						ll += self.__likelihood_partition_pair(edges_2k, len(g2)+1, nodes_k)

					if ll > max:
						max = ll
						argmax = node

			assert argmax != None
			edges_node_1 = self.state.edge_count([argmax], g1)
			edges_node_2 = self.state.edge_count([argmax], g2)
			edges_11 = edges_11 - edges_node_1 
			edges_12 = edges_12 - edges_node_2 + edges_node_1
			edges_22 = edges_22 + edges_node_2
			assert edges_12 >= 0 and edges_11 >= 0 and edges_22 >= 0

			for k in nbrs:
				edges_node_k = self.state.edge_count([argmax], self.state.get_nodes(k))
				g1_edges[k] = g1_edges[k] - edges_node_k
				g2_edges[k] = g2_edges[k] + edges_node_k
				assert g1_edges[k] >= 0 and g2_edges[k] >= 0 
			g1.remove(argmax)
			g2.append(argmax)

		s_g1 = set(g1)
		s_g2 = set(g2)
		s_i = set(self.state.get_nodes(i))
		s_j = set(self.state.get_nodes(j))

		if s_g1 == s_i:
			assert s_g2 == s_j
			return [ ]
		elif s_g1 == s_j:
			assert s_g2 == s_i
			return [ ]
		else:
			return [ MergeSplit(i, j, g1, g2) ]
Example #2
0
    def merge_and_split_partition(self, i):

        # first find best merge
        successors = []
        if two_hop_only:
            candidate_partitions = self.state.neighbors_two_hop(i)
        else:
            candidate_partitions = self.state.get_partitions()

        if len(candidate_partitions) == 0:
            return []

        for j in candidate_partitions:
            if i == j:
                continue
            successors.append(Merge(i, j))

        scores = []
        for merge in successors:
            score = self.score_change(merge)
            assert score != None and str(score) != 'nan'
            scores.append(score)
        merge_op = successors[utils.get_max(scores)]
        j = merge_op.j

        # then find best split of a merged i,j
        nodes = self.state.get_nodes(i)[:] + self.state.get_nodes(j)[:]

        g1 = nodes
        g2 = []
        g1_edges = {}
        g2_edges = {}
        edges_11 = self.state.edge_count(i) + self.state.edge_count(
            j) + self.state.edge_count(i, j)
        edges_12 = 0
        edges_22 = 0

        nbrs = self.state.neighbors([i, j])
        for k in nbrs:
            g1_edges[k] = self.state.edge_count(i, k) + self.state.edge_count(
                j, k)
            g2_edges[k] = 0

        while len(g2) < self.k:

            # find best node from g1 to move to g2
            max = None
            argmax = None

            if random_splitmerge and len(g2) == 0:
                argmax = random.choice(g1)
            else:
                for node in g1:
                    ll = 0

                    # compute change in score between g1 and g2 if we move node to g2
                    edges_node_1 = self.state.edge_count([node], g1)
                    edges_node_2 = self.state.edge_count([node], g2)
                    tmp_edges_11 = edges_11 - edges_node_1
                    tmp_edges_12 = edges_12 - edges_node_2 + edges_node_1
                    tmp_edges_22 = edges_22 + edges_node_2

                    ll += self.__likelihood_partition_pair(
                        tmp_edges_12,
                        len(g1) - 1,
                        len(g2) + 1)
                    ll += self.__likelihood_partition_pair(
                        tmp_edges_11,
                        len(g1) - 1)
                    ll += self.__likelihood_partition_pair(
                        tmp_edges_22,
                        len(g2) + 1)

                    # compute change in edge counts and score if we move node to g2
                    for k in nbrs:
                        nodes_k = self.state.node_count(k)

                        # compute change in edges
                        edges_node_k = self.state.edge_count(
                            [node], self.state.get_nodes(k))
                        edges_1k = g1_edges[k] - edges_node_k
                        edges_2k = g2_edges[k] + edges_node_k

                        ll += self.__likelihood_partition_pair(
                            edges_1k,
                            len(g1) - 1, nodes_k)
                        ll += self.__likelihood_partition_pair(
                            edges_2k,
                            len(g2) + 1, nodes_k)

                    if ll > max:
                        max = ll
                        argmax = node

            assert argmax != None
            edges_node_1 = self.state.edge_count([argmax], g1)
            edges_node_2 = self.state.edge_count([argmax], g2)
            edges_11 = edges_11 - edges_node_1
            edges_12 = edges_12 - edges_node_2 + edges_node_1
            edges_22 = edges_22 + edges_node_2
            assert edges_12 >= 0 and edges_11 >= 0 and edges_22 >= 0

            for k in nbrs:
                edges_node_k = self.state.edge_count([argmax],
                                                     self.state.get_nodes(k))
                g1_edges[k] = g1_edges[k] - edges_node_k
                g2_edges[k] = g2_edges[k] + edges_node_k
                assert g1_edges[k] >= 0 and g2_edges[k] >= 0
            g1.remove(argmax)
            g2.append(argmax)

        s_g1 = set(g1)
        s_g2 = set(g2)
        s_i = set(self.state.get_nodes(i))
        s_j = set(self.state.get_nodes(j))

        if s_g1 == s_i:
            assert s_g2 == s_j
            return []
        elif s_g1 == s_j:
            assert s_g2 == s_i
            return []
        else:
            return [MergeSplit(i, j, g1, g2)]
Example #3
0
def annealing(problem, pickle_filename=None, working_dir=None):	
	logging_cycle = 10
	checkpoint_cycle = 100

	t = 0
	max_score = problem.get_score()
	argmax_score = problem.copy_state()
	max_saved_score = None
	
	uphill = 0
	downhill = 0
	stationery = 0

	while True:

		problem.sanity_check()

		score = problem.get_score()
		assert score <= 0, "Positive score! Expected score is log-likelihood which is <= 0"
		if score > max_score:
			argmax_score = problem.copy_state()
			max_score = score
			if 0-score < 0.0000001:
				print "Found state with score=%g, exiting" % (round(score, 3))
				return argmax_score
	
		if t % checkpoint_cycle == 0:
			state = problem.copy_state()
			filename_state = working_dir + '/%s.%d.i.%d.partition' % (state.g.name, problem.k, t)
			print "checkpoint: saving state to filename=%s" % filename_state
			intermediate_partition = []
			for gid in state.get_partitions():
				intermediate_partition.append(state.get_nodes(gid))
			p = partition.Partition(groups=intermediate_partition)
			p.save(filename_state)
	
			if max_score > max_saved_score:
				state = argmax_score
				filename_best = working_dir + '/%s.%d.b.partition' % (state.g.name, problem.k)
				print "checkpoint: saving best (with score=%g) to filename=%s" % (max_score, filename_best)
				intermediate_partition = []
				for gid in state.get_partitions():
					intermediate_partition.append(state.get_nodes(gid))
				p = partition.Partition(groups=intermediate_partition)
				p.save(filename_best)
				max_saved_score = max_score
			
			
		if t % logging_cycle == 0:

			avg_size = 1. * sum(map(len, problem.get_current_state().partition.values())) \
			/ problem.get_current_state().num_partitions()
			(noops, splits, merges, mergesplits, flips, cache_hit_rate) = problem.get_stats()
			s = "noops=%d splits=%d merges=%d mergesplits=%d flips=%d cache=%g" % (noops, splits, merges, mergesplits, flips, cache_hit_rate)
			
			if merges > 0:
				(nbr_merge, nbr_nbr_merge, nonnbr_merge) = \
				(1.*problem.nbr_merge/merges,\
				1.*problem.nbr_nbr_merge/merges, \
				1.*problem.nonnbr_merge/merges)
				s2 = "nbr_merge=%g nbr_nbr_merge=%g nonnbr_merge=%g" % (nbr_merge, nbr_nbr_merge, nonnbr_merge)
				problem.nbr_merge = problem.nbr_nbr_merge = problem.nonnbr_merge = 0
			else:
				s2 = "nbr_merge=%g nbr_nbr_merge=%g nonnbr_merge=%g" % (0,0,0)
			
			assert t == 0 or (uphill + downhill + stationery) == logging_cycle, "Some moves unnaccounted!"
			print "steps=%d n=%d k=%d avg_size=%g uphill=%g downhill=%g stationery=%g %s %s ll=%g" % \
			(t, problem.n, problem.k, round(avg_size,2), 1.*uphill/logging_cycle,  \
			1.*downhill/logging_cycle, \
			1.*stationery/logging_cycle, \
			s, s2, score)
			uphill = downhill = stationery = 0
				
				
			sys.stdout.flush()
		
		T = problem.temperature(t)
		if T == 0:
			return argmax_score

		state_changes = problem.successors()
		scores = []
		for state_change in state_changes:
			score = problem.score_change(state_change)
			assert str(score) != 'nan'
			scores.append(score)
		
		if len(scores) > 0:
			if sample_successor:
				probs = utils.normalize(scores, 1./T)
				idx = utils.sample(probs)
				score_change = scores[idx]
			else:
				idx = utils.get_max(scores)
				score_change = scores[idx]
			
			if score_change > 0: 
				uphill += 1
				problem.apply_change(state_changes[idx])
			elif random.random() < math.exp(score_change/T):
				downhill += 1
				problem.apply_change(state_changes[idx])
			else:
				stationery += 1
				problem.apply_noop_change()
		else:
			stationery += 1
			problem.apply_noop_change()
		t += 1