def anneal_clique_2best_match(l_graph, k, div, intx_cliq_freq, clq_numb_dist, debug=False): from collections import defaultdict from itertools import combinations from phoenix import divtools from heapq import heappush, heappop ## initialize q_min_div_4graph = [] div_dict = dict() cgraph = l_graph edge_list = [] if len(cgraph.nodes()) is 0: ## g is null return putils.graph_from_clique( k) ## from Null graph to graph of size k ## find the cliques in the current graph cliques = list(nx.find_cliques(cgraph)) print cliques c_lengths = [len(c) for c in cliques] csubset = [ c for c in c_lengths if tuple(sorted((c, k))) in intx_cliq_freq.keys() ] if not csubset: return l_graph print csubset for c in csubset: #print (c,k) pair = tuple(sorted((c, k))) intxn = helpers.get_weighted_random_value(intx_cliq_freq[pair]) m_nodes = max( cgraph.nodes()) + 1 + k - intxn ## using len | or max + 1 if not debug: print " ", m_nodes, ':max_nodes' clq_intx = set(pair).intersection(set(c_lengths)) if clq_intx: if debug: print ' ', clq_intx, ':clq_intx' else: continue c1 = cliques[c_lengths.index(clq_intx.pop())] if debug: print ' ', c1, ': G.clique' #clique to anneal new k-clique to if not debug: print ' clique-sum(', k, ').to(', c1, ').along-intxn:', intxn newnodes = range(m_nodes - k + intxn, m_nodes) base_clique_comb = set(combinations(c1, intxn)) for c_comb in base_clique_comb: clq_nodes = set(c1).difference(c_comb) #print ' ',c_comb,newnodes clq2add = [list(c_comb), newnodes] # clq2add.append(list(c_comb)) # clq2add.append(range(max(cgraph.nodes())+1,m_nodes)) #print ' ',list(c_comb), range(max(cgraph.nodes())+1,m_nodes) #print clq2add clq2add = reduce(lambda x, y: x + y, clq2add) if not debug: print ' ', c1, '+', clq2add #,":clique to add" edge_lst = set(combinations(clq2add, 2)) if not edge_lst: #print '. empty edge list' continue if debug: print ' ', edge_lst, ':edge list set' cgraph.add_edges_from(edge_lst) #print ' ', cgraph.number_of_nodes() ## get current distributions for cgraph clqs = list(nx.find_cliques(cgraph)) clq_dist = helpers.clique_number_distribution(clqs) clq2clq_intxn_dist = phoenix.clique2cliqueIntersection(clqs) # clq_dist = helpers.normalize_distribution(clq_dist) clq2clq_intxn_dist = helpers.normalize_distributions( clq2clq_intxn_dist) c_div = divtools.jsd2(clq_numb_dist, clq_dist) x_div = divtools.avg_jsd2(intx_cliq_freq, clq2clq_intxn_dist) avg_div = np.mean([c_div, x_div]) ## regular avg. ## add avg_div to dict div_dict[avg_div] = edge_lst if debug: print ' ', avg_div, ':avg div' # ...... ends for each combination if len(div_dict) == 0: return l_graph #print ' div:',div_dict[min(div_dict)] edge_list = div_dict[min(div_dict)] print ' ', min(div_dict) #, k, edge_list l_graph.add_edges_from(edge_list) #print l_graph.nodes() return l_graph
def anneal_bycliquesweep_toGraph(l_graph, k, div, intx_cliq_freq, clq_numb_dist, debug=False): from collections import defaultdict from itertools import combinations from phoenix import divtools from heapq import heappush, heappop from networkx.algorithms.approximation import clique ## initialize q_min_div_4graph = [] div_dict = dict() cgraph = l_graph if len(cgraph.nodes()) is 0: ## g is null return putils.graph_from_clique( k) ## from Null graph to graph of size k """ Below models when we sweep across all cliques in the forming graph """ ## find the cliques in the current graph cliques = list(nx.find_cliques(cgraph)) ## list of G.clique lengths c_lengths = [len(c) for c in cliques] if debug: print c_lengths, k, ':[G.c],k' for cl in c_lengths: pair = tuple((cl, k)) if not (pair in intx_cliq_freq.keys()): #pair = tuple((cl, k)) continue intxn = helpers.get_weighted_random_value(intx_cliq_freq[pair]) print ' :', cgraph.number_of_nodes(), pair, intxn if not intxn: print ' No intersection' continue elif debug: print " %d, %s, %s : (k,intxn,pair)" % (k, intxn, pair) #======= # #print kpairs # j = 0 # for pair in kpairs: # if not [c for c in c_lengths if set((c,k)) == set(pair)]: # continue # # intxn = helpers.get_weighted_random_value(intx_cliq_freq[pair]) # if not intxn: # print ' No intersection' # continue # elif not debug: print " ",intxn,':intxn', pair,':pair' m_nodes = len(cgraph.nodes()) + k - intxn ## using len | or max + 1 if debug: print " ", m_nodes, ':max_nodes' ## clq_intx = set(pair).intersection(set(c_lengths)) if clq_intx: if debug: print ' ', clq_intx, ':clq_intx' else: continue c1 = cliques[c_lengths.index(clq_intx.pop())] ## the clique we will anneal the new k-clique to if not debug: print ' ', c1, ': G.clique' # base_clique_comb = set(combinations(c1, intxn)) for c_comb in base_clique_comb: clq_nodes = set(c1).difference(c_comb) #print ' ',c_comb, clq_nodes.pop() # clq2add = [] clq2add.append(list(c_comb)) clq2add.append(range(max(cgraph.nodes()) + 1, m_nodes)) #print ' ',list(c_comb), range(max(cgraph.nodes())+1,m_nodes) #print clq2add clq2add = reduce(lambda x, y: x + y, clq2add) if not debug: print ' ', clq2add, ":clique to add" edge_lst = set(combinations(clq2add, 2)) if not edge_lst: #print '. empty edge list' continue if not debug: print ' ', edge_lst, ':edge list set' cgraph.add_edges_from(edge_lst) ## get current distributions for cgraph clqs = list(nx.find_cliques(cgraph)) clq_dist = helpers.clique_number_distribution(clqs) clq2clq_intxn_dist = phoenix.clique2cliqueIntersection(clqs) # clq_dist = helpers.normalize_distribution(clq_dist) clq2clq_intxn_dist = helpers.normalize_distributions( clq2clq_intxn_dist) c_div = divtools.jsd2(clq_numb_dist, clq_dist) x_div = divtools.avg_jsd2(intx_cliq_freq, clq2clq_intxn_dist) avg_div = np.mean([c_div, x_div]) ## regular avg. ## add avg_div to dict div_dict[avg_div] = edge_lst if debug: print ' ', avg_div, ':avg div' # ...... ends for each combination if not len(div_dict): print 'no div dict' return cgraph print ' ', min(div_dict.keys()), ':min div for last comb set' # ends for cl in G if len(div_dict) == 0: return cgraph print ' ', min(div_dict) #print ' div:',div_dict[min(div_dict)] edge_list = div_dict[min(div_dict)] if debug: print edge_list l_graph.add_edges_from(edge_list) #print l_graph.nodes() return l_graph
## graph.cliques g = load_graph(input_graph) g_cliques = list(nx.find_cliques(g)) ## graph.compress print '-' * 80 print('Starting compression ...') ## clique distribution clq_dist = helpers.clique_number_distribution(g_cliques) ## clique to clique intersection distribution clq2clq_intxn_dist = phoenix.clique2cliqueIntersection(g_cliques) clq_dist = helpers.normalize_distribution(clq_dist) clq2clq_intxn_dist = helpers.normalize_distributions(clq2clq_intxn_dist) if debug: print " ", clq_dist.keys() if debug: print " ", clq2clq_intxn_dist.keys() if debug: print ' Graph compressed. Distributions normalized.' print '.' * 80, '\nGraph Formation...' leGraph = graph_formation(clq_dist, clq2clq_intxn_dist, .5, 36, True) print ' ', nx.diameter(g), nx.diameter(leGraph), leGraph.number_of_nodes() ## # g_clqs = list(nx.find_cliques(leGraph)) # c_dist = helpers.clique_number_distribution(g_clqs) # c_dist = helpers.normalize_distribution(clq_dist) # if debug: print ' ', divtools.jsd2(clq_dist, c_dist) #pp(clq_dist)
def fuse_cliques_pair(clq_pr, kl_threshold, cliq_numb_dist, poss_combs): from heapq import heappush, heappop print("-- fusing the nodes ...") # poss_combs has the size of the intersection embedded # clq_pr tells us the the left and right side cliques. print "-- left and right sides", clq_pr jsdh = [] #jsd_dict_heap = dict() ## test combinations for minimal divergence from reference for intrsct_nodes in poss_combs: #print " ", intrsct_nodes edg_lst = [] lft_side = combine_sets(intrsct_nodes, clq_pr[0]) lft_side = list(combinations(lft_side, 2)) rgt_side = combine_sets(intrsct_nodes, clq_pr[1]) rgt_side = list(combinations(rgt_side, 2)) #print lft_side, rgt_side g = nx.Graph() g.add_edges_from(lft_side) g.add_edges_from(rgt_side) ## now that we created this graph, we get compress it ## get its distribution models ## and compare them to one of the input distributions cliques = list(nx.find_cliques(g)) loc_cliques_dist = helpers.clique_number_distribution(cliques) loc_cliques_dist = helpers.normalize_distribution(loc_cliques_dist) loc_clq_len = len(loc_cliques_dist.values()) loc_cliques_dist_v = loc_cliques_dist.values() # pad the array of values with np.nan [ loc_cliques_dist_v.append(np.nan * x) for x in range(0, len(cliq_numb_dist.values()) - loc_clq_len) ] heappush( jsdh, (jsd(cliq_numb_dist.values(), loc_cliques_dist_v), intrsct_nodes)) min_div = heappop(jsdh) if debug: print min_div[1] ## nodes tuple #print list(min_div[1]) ## intersection comb that yields the smallest divergence is obtained via ## heappop( jsdh ) ## Given this, we freeze the graph @ this intersection edg_lst = [] lft_side = combine_sets(min_div[1], clq_pr[0]) #lft_side = list(combinations(lft_side,2)) rgt_side = combine_sets(min_div[1], clq_pr[1]) if debug: print lft_side, rgt_side g = nx.Graph() if (len(lft_side) == 2): g.add_edges_from([lft_side]) else: g.add_edges_from(lft_side) if (len(rgt_side) == 2): g.add_edges_from([rgt_side]) else: g.add_edges_from(rgt_side) return g
## Compression pp('Starting compression ...') one_to_two_model, two_to_one_intxn_model = phoenix.compress(cliques) #pp(one_to_two_model) one_to_two_model = helpers.normalize_distributions(one_to_two_model) two_to_one_intxn_model = helpers.normalize_distributions( two_to_one_intxn_model) #print(two_to_one_intxn_model) clq_numb_dist = helpers.clique_number_distribution(cliques) clqs_x_clqs_dist = helpers.cliques_x_cliques_distribution(cliques) #pp(clqs_x_clqs_dist) clq_numb_dist = helpers.normalize_distribution( clq_numb_dist) # the distribution of clique lengths clqs_x_clqs_dist = helpers.normalize_distributions(clqs_x_clqs_dist) # print 'Compression Summary: \n Number of cliques: %d' % len(cliques) # print ' Compression rules: 1:2 model and 2:1 intersection model: %d, %d' % (len(one_to_two_model), len(two_to_one_intxn_model)) # print ' Clique sizes: ', (len(clq_numb_dist)) # print ' Cliques intersections: ', len(clqs_x_clqs_dist) ## Decompression: generated_graph = nx.Graph() generated_graph.add_edge(0, 1) # seed_g = list(nx.find_cliques(generated_graph)) # #seeds # # generated_graph # # clq_numb_dist # # clqs_x_clqs_dist