def Cliques(G) : """ Return a list of cliques the player belongs to """ nx.find_cliques(G) cliques = [] for i in G.nodes() : cliques.append(nx.cliques_containing_node(G, i)) return cliques
def main(): ap = argparse.ArgumentParser() ap.add_argument("--graph", type=str, help="graph file") arg = ap.parse_args() result = get_graph(arg.graph) cliques = [clique for clique in nx.find_cliques(result) if len(clique)>2] print cliques
def test_selfloops(self): self.G.add_edge(1,1) cl=list(nx.find_cliques(self.G)) rcl=nx.find_cliques_recursive(self.G) assert_equal(sorted(map(sorted,cl)), sorted(map(sorted,rcl))) assert_equal(cl, [[2, 6, 1, 3], [2, 6, 4], [5, 4, 7], [8, 9], [10, 11]])
def all_cliques(graph, count): """ Now, given a large graph, sample the cliques and test for homogeneity Parameters ---------- graph : a networkx graph Method ------ * creates a mapping from nodes to communities * uses networkx to generate several cliques and maps the clique members to communities, if the clique has at least 4 members """ pf = open('cliques_within_the_graph.pkl', 'wb') part = CD.modularity_run(graph) cgen = nx.find_cliques(graph) found = [] for i in xrange(count): try: clump = cgen.next() if len(clump) > 2: found.append( ([part[n] for n in clump], clump) ) except: pickle.dump( (graph, part, found) , pf) pf.close() return found pickle.dump( (graph, part, found) , pf) pf.close() return found
def main(): import networkx as nx global GENETIC_CODE, AA_SEQ, AA_LENGTH, NUC_LENGTH, HAMMING_DIST GENETIC_CODE = {'L': ['CTT', 'CTC', 'CTA', 'CTG', 'TTA', 'TTG'], 'R': ['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'], 'S': ['TCT', 'TCC', 'TCA', 'TCG'], 'T': ['ACT', 'ACC', 'ACA', 'ACG']} AA_SEQ = 'RLRLRLRLRL' # AA_SEQ = 'RRRR' HAMMING_DIST = 8 AA_LENGTH = len(AA_SEQ) NUC_LENGTH = 3*AA_LENGTH current_opts = [nucs for nucs in GENETIC_CODE[AA_SEQ[0]]] for aa in range(AA_LENGTH-1): new_opts = [] for opt in current_opts: for nucs in GENETIC_CODE[AA_SEQ[len(current_opts[0])/3-1]]: new_opts.append(opt+nucs) current_opts = new_opts[:] print 'there are %i options' % len(current_opts) G = nx.Graph() [G.add_node(a) for a in current_opts] [G.add_edge(a, b) for a in current_opts for b in current_opts if different_enough(a, b)] print 'found %i edges' % len(G.edges()) cliques = [a for a in nx.find_cliques(G)] print 'now printing cliques larger than 4' for clique in cliques: if len(clique) >= 10000: print clique
def getCliques(g): netscience_graph = g t0 = datetime.datetime.now() cliques = list(nx.find_cliques(netscience_graph)) print datetime.datetime.now()-t0,' elapsed time.' print (len(cliques)) print cliques[0]
def compareGraphs(G, H, fields=('element',), tolerance=0.5, returnmatching=False): # Comparison algorithm based on: # "Chemoisosterism in the Proteome", X. Jalencas, J. Mestres, JCIM 2013 # http://pubs.acs.org/doi/full/10.1021/ci3002974 if G == H: if returnmatching: return 1, len(G), [(x, x) for x in G.nodes()] else: return 1 if len(G.edges()) == 0 or len(H.edges()) == 0: if returnmatching: return 0, 0, [] else: return 0 Gprod = createProductGraph(G, H, tolerance, fields) # Calculate the maximal cliques and return the length of the largest one maxcliques = np.array(list(nx.find_cliques(Gprod))) cllen = np.array([len(x) for x in maxcliques]) score = (cllen.max() / max(len(G.nodes()), len(H.nodes()))) if returnmatching: return score, cllen.max(), maxcliques[cllen.argmax()] else: return score
def createR(self): clases = set() cliques = 0 for q in nx.find_cliques(self.G): if (len(q) <3) or (len(q)>6) : continue cliques += 1 tmp_list_sign = self.getSetSignAASeq(q)['list_signature'] self.how_many_signatures[tuple(tmp_list_sign)] += 1 L = ','.join(map(lambda(x):str(x),sorted(tmp_list_sign))) self.osisDictString[L].add(','.join(q)) self.osisDict[L].add(tuple(q)) map(lambda(i):self.osisDictElements[L].add(i),q) rcname = hash(tuple(q)) self.metainfo_node[rcname] = (set(q),tmp_list_sign) self.HG.add_node(rcname) for hn in self.HG.nodes(): if self.metainfo_node[hn][0] & self.metainfo_node[rcname][0]: self.HG.add_edge(hn,rcname) classindex = 0 for K in xrange(3,7): for signa in rcd.setSignatures[K]: self.RCCvector[classindex] = self.how_many_signatures[tuple(signa)] for n in self.HG.nodes(): if self.metainfo_node[n][1] != signa: continue self.RCCvector2[classindex] += self.HG.degree(n) classindex += 1
def f21(self): start = 0 clique_list = list(nx.find_cliques(self.G)) res = len(clique_list) stop = 0 # self.feature_time.append(stop - start) return res
def get_percolated_cliques(G, k): percolation_graph = nx.Graph() cliques = [frozenset(c) for c in nx.find_cliques(G) if len(c) >= k] percolation_graph.add_nodes_from(cliques) # First index which nodes are in which cliques membership_dict = defaultdict(list) for clique in cliques: for node in clique: membership_dict[node].append(clique) def get_adjacent_cliques(clique, membership_dict): adjacent_cliques = set() for n in clique: for adj_clique in membership_dict[n]: if clique != adj_clique: adjacent_cliques.add(adj_clique) return adjacent_cliques # For each clique, see which adjacent cliques percolate for clique in cliques: for adj_clique in get_adjacent_cliques(clique, membership_dict): if len(clique.intersection(adj_clique)) >= (k - 1): percolation_graph.add_edge(clique, adj_clique) print 'percolation graph nodes:', percolation_graph.nodes() print 'percolation graph edges:', percolation_graph.edges() # Connected components of clique graph with perc edges # are the percolated cliques for component in nx.connected_components(percolation_graph): yield (frozenset.union(*component))
def plotUnweightedCommunities(G, k_clique, n_nodes,iw): cls = nx.find_cliques(G) communities = list(nx.k_clique_communities(G,k_clique ,cliques = cls)) print(len(communities)) pos=nx.graphviz_layout(G) # positions for all nodes plt.figure(figsize=(12,12)) #colors = ["green","yellow","red","blue","pink","orange","gray","brown","black","white","purple","green","yellow","red","blue","pink","orange","gray","brown","black","white","purple"] # colors = ["#ff0000","#ff8000","#ffbf00","#ffff00","#bfff00","#80ff00","#40ff00","#00ff00", "#00ff40", "#00ff80", "#00ffbf", "#00ffff", "#00bfff", "#0080ff", "#0040ff", "#0000ff", "#4000ff", "#8000ff", "#bf00ff", "#ff00ff", "#ff00bf", "#ff0080", "#ff0040","#ff0000","#ff8000","#ffbf00","#ffff00","#bfff00","#80ff00","#40ff00","#00ff00", "#00ff40", "#00ff80", "#00ffbf", "#00ffff", "#00bfff", "#0080ff", "#0040ff", "#0000ff", "#4000ff", "#8000ff", "#bf00ff", "#ff00ff", "#ff00bf", "#ff0080", "#ff0040"] for i in range(len(communities)): nx.draw_networkx_nodes(G,pos,nodelist=list(communities[i]),node_color=colors[i%len(colors)]) nx.draw_networkx_edges(G,pos,width=0.5) # labels nx.draw_networkx_labels(G,pos,font_size=10,font_family='sans-serif') plt.axis('off') plt.savefig("./communities/unweighted_"+"comm_"+"w"+str(iw)+"k"+str(k_clique)+".png") # save as png plt.close()
def calculate_comembership(self, backend=False): logging.info("Calculating comembership.") nodes = self.graph.nodes() n = len(nodes) if not backend and n > 500: raise network_big.NetworkTooBigException(n) cliques = list(find_cliques(self.graph)) w = {} for clique in cliques: for node1 in clique: for node2 in clique: try: w[node1,node2] += 1 except KeyError: w[node1,node2] = 1 nodes = w.keys() comembership = float(0) for node1, node2 in nodes: if node1 != node2: comembership += w[node1,node2] num_nodes = len(self.graph.nodes()) comembership /= num_nodes*(num_nodes-1) self.measures['comembership'] = comembership self.nodesmeasures['comembership'] = None self.edgesmeasures['comembership'] = w
def find_cliques(self,g): import networkx as nx nodes=g.nodes() G=nx.Graph() G.add_nodes_from(nodes) for item1 in nodes: for item2 in nodes: if not item1==item2: if g.number_of_edges(item1, item2): G.add_edge(item1, item2) cliques=[item for item in list(nx.find_cliques(G)) if len(item)>=3] checked_clique=[]#一个clique中的所有节点在一句话出现 for cl in cliques: flag=False for item in self.keywords: occur=0 for word in cl: if word in item: occur=occur+1 if occur==len(cl): flag=True if flag: checked_clique.append(cl) #print(cl) return checked_clique
def plotWeightedCommunities(G, W_lim, k_clique, n_nodes): for i in range(0,n_nodes): for j in range(i,n_nodes): if(i!=j): if(G[i][j]['weight'] < W_lim): G.remove_edge(i,j) cls = nx.find_cliques(G) communities = list(nx.k_clique_communities(G,k_clique ,cliques = cls)) print(len(communities)) pos=nx.graphviz_layout(G) # positions for all nodes plt.figure(figsize=(12,12)) #colors = ["green","yellow","red","blue","pink","orange","gray","brown","black","white","purple","green","yellow","red","blue","pink","orange","gray","brown","black","white","purple"] for i in range(len(communities)): nx.draw_networkx_nodes(G,pos,nodelist=list(communities[i]),node_color=colors[i]) nx.draw_networkx_edges(G,pos,width=0.5) # labels nx.draw_networkx_labels(G,pos,font_size=10,font_family='sans-serif') plt.axis('off') plt.savefig("comm_w_"+str(W_lim)+"k"+str(k_clique)+".png") # save as png plt.close()
def diagonal_bins_to_cliques(args, run_filters, bins): G = nx.Graph() [G.add_node(a) for a in bins.values()] for n1 in G.nodes_iter(): for n2 in G.nodes_iter(): if n1.name == n2.name: continue docs_diff = docs_differ_symmetry(n1.doc_AASeq, n2.doc_AASeq, '1ohz') if switches_differ(args, n1.coh_switch, n2.coh_switch) and docs_diff >= args['doc_diff_by']: G.add_edge(n1, n2) else: print('\n') print(n1.coh_switch) print(n2.coh_switch) print(n1.doc_switch) print(n2.doc_switch, docs_diff) cliques = [a for a in nx.find_cliques(G)] max_len = max([len(a) for a in cliques]) max_cliques = [a for a in cliques if len(a) == max_len] print('there are %i cliques with %i structures in each for diff_by=%i doc_diff_by=%i' % (len(max_cliques), max_len, args['diff_by'], args['doc_diff_by'])) return max_cliques
def find_best_clique(sim_mat, size): G = nx.Graph() for x in xrange(len(sim_mat)): G.add_node(x) edges = get_sorted_edges(sim_mat) x = 0 thresh = 0.05 while thresh <= 1: while x < len(edges) and edges[x][2] <= thresh: G.add_edge(edges[x][0], edges[x][1]) x += 1 max_cliques = nx.find_cliques(G) # bucket sort by_size = collections.defaultdict(list) for clique in max_cliques: by_size[len(clique)].append(clique) biggest = max(by_size.keys()) if biggest >= size: # do tie breaking cliques = by_size[biggest] best_clique = None best_score = 1000000 for clique in cliques: score = max_weight_clique(sim_mat, clique) if score < best_score: best_score = score best_clique = clique return best_clique thresh += 0.05
def create_clique_list(self): ''' a method to create a list of cliques Parameters: None Returns: cliques: the list of cliques (list) chromatic: the chromatic number (int) l_index: the largest clique index (int) ''' g = self.graph.copy() chromatic = 0 l_index = 0 index = 0 cliques = [] while len(g.nodes()) > 0: largest = 0 for clique in nx.find_cliques(g): if len(clique) > largest: largest = len(clique) largest_clique = clique clique = [] for node in largest_clique: g.remove_node(node) clique.append([node]) if len(clique) > chromatic: chromatic = len(clique) largest = clique l_index = index cliques.append(clique) index += 1 return cliques, chromatic, l_index
def find_cliques(G, min_size): """Find all cliques in G above a given size. If a node is part of a larger clique is deleted from the smaller ones. Returns ------- dict Mapping nodes to clique ID """ cliques = [] for K in nx.find_cliques(G): if len(K) >= min_size: cliques.append(set(K)) cliques.sort(reverse=True, key=lambda x: len(x)) L = set() for K in cliques: K -= L L |= K cliques = [J for J in cliques if len(J) >= min_size] node_to_clique = {} for i, K in enumerate(cliques): for node in K: if node not in node_to_clique: node_to_clique[node] = i return node_to_clique
def get_percolation_graph(): percolation_graph = nx.Graph() cliques = [frozenset(c) for c in nx.find_cliques(G) if len(c) >= k] print 'first max cliques:', cliques percolation_graph.add_nodes_from(cliques) # First index which nodes are in which cliques membership_dict = defaultdict(list) for clique in cliques: for node in clique: membership_dict[node].append(clique) # For each clique, see which adjacent cliques percolate for clique in cliques: def get_adjacent_cliques(clique, membership_dict): adjacent_cliques = set() for n in clique: for adj_clique in membership_dict[n]: if clique != adj_clique: adjacent_cliques.add(adj_clique) return adjacent_cliques for adj_clique in get_adjacent_cliques(clique, membership_dict): if len(clique.intersection(adj_clique)) >= (k - 1): percolation_graph.add_edge(clique, adj_clique) print '\npercolation graph nodes:', percolation_graph.nodes() print 'percolation graph edges:', percolation_graph.edges() return percolation_graph
def find_predicted_cliques(): """ :return:[[member1, member2....], ...] all cliques that are completely predicted and are orthogonal """ coh_doc_purples = creat_coh_doc_purples() G = nx.Graph() all_cohs = list(coh_doc_purples.keys()) all_docs = list(set([doc for coh in all_cohs for doc in coh_doc_purples[coh].keys()])) # [G.add_node((coh, doc)) for coh in all_cohs for doc in all_docs if coh_doc_purples[coh][doc] >= 10] for coh in all_cohs: for doc in all_docs: if doc in coh_doc_purples[coh].keys(): if coh_doc_purples[coh][doc] >= 10: G.add_node((coh, doc)) for coh1, doc1 in G.nodes_iter(): for coh2, doc2 in G.nodes_iter(): if (coh1, doc1) != (coh2, doc2): if doc1 in coh_doc_purples[coh2].keys() and doc2 in coh_doc_purples[coh1].keys(): if coh_doc_purples[coh1][doc2] < 10 and coh_doc_purples[coh2][doc1] < 10: G.add_edge((coh1, doc1), (coh2, doc2)) cliques = list(nx.find_cliques(G)) print('found the following cliques:') for clq in cliques: print(clq, len(clq)) print('the grapg had %i nodes, and %i edges' % (G.number_of_nodes(), G.number_of_edges())) return cliques
def testConnectNodeList(self): node_list = [0, 3, 6, 1, 4, 7, 2, 5, 8] self.g.connect_node_list(node_list) for clique in nx.find_cliques(self.g._graph): expect = len(clique) break self.assertEqual(expect, self.n**2)
def clique_zoeker(graph): # Haalt alle benodigde nodes uit ingevoerde connecties. nodes = set([n1 for n1, n2 in graph] + [n2 for n1, n2 in graph]) # Maakt nieuwe graph aan. G = nx.Graph() # Maakt alle nodes. for node in nodes: G.add_node(node) # Maakt alle lijnen tussen nodes (connecties). for edge in graph: G.add_edge(edge[0], edge[1]) # Zoekt naar alle cliques. clique = nx.find_cliques(G) # Plaatst alle gevonden cliques in een lijst. result = list(clique) # Haalt de grootste clique uit de lijst. result = max(result,key=len) # Returnt de grootste clique. return result
def clique_graph(G, create_using=None, name=None): """Create the maximal clique graph of a graph. Finds the maximal cliques and treats these as nodes. The nodes are connected if they have common members in the original graph. Theory has done a lot with clique graphs, but I haven't seen much on maximal clique graphs. Notes ----- This should be the same as make_clique_bipartite followed by project_up, but it saves all the intermediate steps. """ cliq = list(map(set, nx.find_cliques(G))) if create_using: B = create_using B.clear() else: B = nx.Graph() if name is not None: B.name = name to_node = lambda cl: tuple(sorted(cl)) for i, cl in enumerate(cliq): u = to_node(cl) B.add_node(u) for j, other_cl in enumerate(cliq[:i]): intersect = cl & other_cl if intersect: # Not empty B.add_edge(u, to_node(other_cl), weight=len(intersect)) return B
def get_ego_cliques(ego): ego_cliques_dmp = join(DATA_DIR, 'cliques', 'cliques_%s.zip'%ego) if not os.path.exists(ego_cliques_dmp): print 'Processing cliques: nx.find_cliques, ego:', ego G = load_ego_graph(ego) # this can take some time... # http://pymotw.com/2/zipfile/ with zipfile.ZipFile(ego_cliques_dmp, mode='w') as zf: fileno = 1 ego_cliques = [] for idx, clqs in enumerate(nx.find_cliques(G)): if idx%100000==0 and ego_cliques: _write_cliques_file(zf, fileno, ego_cliques) fileno += 1 ego_cliques = [] ego_cliques.append(clqs) _write_cliques_file(zf, fileno, ego_cliques) ego_cliques = None if False: #ego==5881: print 'In get_ego_cliques, skipping ego', ego else: print 'Loading cliques for ego:', ego with zipfile.ZipFile(ego_cliques_dmp, mode='r') as zf: for f in zf.namelist(): cliques_in_file = json.loads(zf.read(f)) for clique in cliques_in_file: yield clique
def __pes_to_bp_gen_conds_pre_clique_based (es, unf, ev_tab, pre_tab, indep,split_conflicts=False) : for e in es.events : # for all events in e.post, build graph whose edges are # the dependence relation g = networkx.Graph () g.add_nodes_from (e.post) for e1 in e.post : for e2 in e.post : if e1 != e2 and not indep[e1.label, e2.label] : g.add_edge (e1, e2) # for every clique, generate one condition for clique in networkx.find_cliques (g) : # remove events for which there is already condition if not split_conflicts: for ep in [ep for ep in clique if (e, ep) in pre_tab] : clique.remove (ep) if len (clique) == 0 : continue unfpostevs = [ev_tab[ep] for ep in clique] #agrega la condicion if split_conflicts: for unfpost in unfpostevs: c = unf.cond_add (None, [ev_tab[e]], [unfpost]) else: c = unf.cond_add (None, [ev_tab[e]], unfpostevs) for ep in clique : pre_tab[e, ep] = c # events with empty preset will never occurr in previous # search, deal with them separately if len (e.pre) == 0 : if (None, e) not in pre_tab : c = unf.cond_add (None, [], [ev_tab[e]]) pre_tab[None, e] = c return pre_tab
def find_foundations(self, cache = True): if cache and isinstance(self._foundations, list): return self._foundations foundations = list(nx.find_cliques(self)) foundations = self._reduce_cliques(foundations) self._foundations = foundations return self._foundations
def nx_cliques(ppis, min_len=3, min_weight=0): G = nx.Graph() G.add_weighted_edges_from([p[:3] for p in ppis]) qs = [set(c) for c in nx.find_cliques(G) if len(c) >= min_len] if min_weight: qs = [q for q in qs if avg_weight(G,q) > min_weight] return qs
def find_disjoint_sets(found_sets): # uses python graph data structure in which each node is a set # edges are created between nodes if the nodes are disjoint sets # the maximum clique algorithm is used to calculate the largest collection # of disjoint sets # initialize graph graph = nx.Graph() # add all sets as nodes in the graph for i in xrange(len(found_sets)): graph.add_node(found_sets[i]) # iteraties though each node and adds edges for node1 in graph.nodes(): for node2 in graph.nodes(): if node1 == node2: continue if node2 in graph.neighbors(node1): continue else: if is_disjoint(node1, node2): graph.add_edge(node1, node2) # use find_cliques function generator to find the max cliques max_clique = [] for clique in nx.find_cliques(graph): if len(max_clique) < len(clique): max_clique = clique return max_clique
def __init__(self, points, epsilon, labels=None, distfcn=distance.euclidean): self.pts = points self.labels = range(len(self.pts)) if labels==None or len(labels)!=len(self.pts) else labels self.epsilon = epsilon self.distfcn = distfcn self.network = self.construct_network(self.pts, self.labels, self.epsilon, self.distfcn) self.import_simplices(map(tuple, list(nx.find_cliques(self.network))))
def collapsible_patterns(alms, G, context, ref='pcogids', verbose=False, use_taxa=["Old_Burmese", "Burmese", "Written_Burmese", "Rangoon", "Achang_Longchuan", "Xiandao", "Lashi", "Atsi", "Bola", "Maru"]): if [x for x in use_taxa if x not in alms.taxa]: raise ValueError("Your list of taxa contains taxa not in the wordlist.") patterns = defaultdict(list) for node, data in G.nodes(data=True): concept = alms[alms.msa[ref][int(node)]['ID'][0], 'concept'] words = [] msa = alms.msa[ref][int(node)] for i, t in enumerate(use_taxa): if t in msa['taxa']: words += [''.join(msa['seqs'][msa['taxa'].index(t)]).replace('-','')] else: words += ['Ø'] patterns[data['clique']] += [(node, concept, words)] collapsible = defaultdict(list) for pattern, vals in patterns.items(): g = nx.Graph() for n, c, words in vals: collapsible[pattern, tuple(words)] += [(n, c)] g.add_node(n, c=c, w=words) for (n1, c1, words1), (n2, c2, words2) in combinations(vals, r=2): if compatible_columns(words1, words2, gap='Ø') >= 1: g.add_edge(n1, n2) for clique in nx.find_cliques(g): if len(clique) > 1: for n in clique: print(pattern, '{0:4}'.format(n), '{0:22}'.format(g.node[n]['c'][:21]), ' '.join(['{0:6}'.format(x) for x in g.node[n]['w']])) print('--')
bet = nx.betweenness_centrality(facebook_net) bet_sorted = sorted(bet.items(), key=operator.itemgetter(1), reverse=True) print("Top 10 betweenness centrality (node, centrality): ", bet_sorted[0:9]) #Eigenvector centrality top 10 eig = nx.eigenvector_centrality(facebook_net) eig_sorted = sorted(eig.items(), key=operator.itemgetter(1), reverse=True) print("Top 10 eigenvector centrality (node, centrality): ", eig_sorted[0:9]) #Pagerank centrality top 10 pag = nx.pagerank(facebook_net) pag_sorted = sorted(pag.items(), key=operator.itemgetter(1), reverse=True) print("Top 10 pagerank centrality (node, centrality): ", pag_sorted[0:9]) #Trim network to only show nodes with more than 1 connection facebook_net_trimmed = facebook_net.copy() for n in facebook_net_trimmed.nodes(): if deg[n] < 2: facebook_net_trimmed.remove_node(n) #View all cliques cliques = list(nx.find_cliques(facebook_net_trimmed)) print("Cliques:") for c in cliques: print(c) #Export data for use in Gephi nx.write_gexf(facebook_net, "facebook_network.gexf") #Plot Facebook network nx.draw_random(facebook_net) plt.show()
def test_find_cliques2(self): hcl = list(nx.find_cliques(self.H)) assert_equal(sorted(map(sorted, hcl)), [[1, 2], [1, 4, 5, 6], [2, 3], [3, 4, 6]])
data_path = '../../data/clique/simple' + ('_single' if single_place else '') + '.csv' fig_path = '../../results/clique/simple' + ('_single' if single_place else '') + '.png' if fresh_data: # Import data and generate network _, edges = load_airport_and_route(deep_load=True) netx = from_edgelist(edges) N = number_of_nodes(netx) budget = N * balls_per_node net = network(N, graph=netx) print('Data import and network generated') # Find and sort cliques cliques = sorted(find_cliques(netx), key=lambda c: len(c), reverse=True) trial_infections = [] num_cliques = linspace(1, 120, 40).astype(int) for num in num_cliques: simple_cliques(net, num, budget, cliques=cliques, single_place=single_place) trial = run_polya(net, trials=2) trial_infections.append(trial[len(trial) - 1]) else: trial_infections, num_cliques = load_csv_col(data_path, with_headers=True, parse=float,
print "There are ", len(components), "component" for component in components: print "length:", len(component) # <markdowncell> # Plotting Ego Graph of a person. # <codecell> name = "Abhinav Pandey" ego = nx.ego_graph(g, name, radius=1) nx.draw(ego) plot.show() print "Clustering:", nx.clustering(g, name) # <markdowncell> # Calculating Cliques in Graph. These are closed faternity like terrorist organization. # <codecell> clique = nx.find_cliques(g) clique = list(clique) sorted_clique = sorted(clique, key=lambda x: len(x)) sorted_clique[-1] # <codecell> # <codecell>
def fuzzy(threshold, matrix, taxa, method='upgma', revert=False): """ Create fuzzy cluster of a given distance matrix. Parameters ---------- threshold : float The threshold that shall be used for the basic clustering of the data. matrix : list A two-dimensional list containing the distances. taxa : list An list containing the names of all taxa corresponding to the distances in the matrix. method : { "upgma", "single", "complete" } (default="upgma") Select the method for the flat cluster analysis. distances : bool If set to "False", only the topology of the tree will be returned. revert : bool (default=False) Specify whether a reverted dictionary should be returned. Returns ------- cluster : dict A dictionary with cluster-IDs as keys and a list as value, containing the taxa that are assigned to a given cluster-ID. Examples -------- The function is automatically imported along with LingPy. >>> from lingpy import * from lingpy.algorithm import squareform Create a list of arbitrary taxa. >>> taxa = ['German','Swedish','Icelandic','English','Dutch'] Create an arbitrary distance matrix. >>> matrix = squareform([0.5,0.67,0.8,0.2,0.4,0.7,0.6,0.8,0.8,0.3]) >>> matrix [[0.0, 0.5, 0.67, 0.8, 0.2], [0.5, 0.0, 0.4, 0.7, 0.6], [0.67, 0.4, 0.0, 0.8, 0.8], [0.8, 0.7, 0.8, 0.0, 0.3], [0.2, 0.6, 0.8, 0.3, 0.0]] Carry out the fuzzy flat cluster analysis. >>> fuzzy(0.5,matrix,taxa) {1: ['Swedish', 'Icelandic'], 2: ['Dutch', 'German'], 3: ['Dutch', 'English']} Notes ----- This is a very simple fuzzy clustering algorithm. It basically does nothing else than removing taxa successively from the matrix, flat-clustering the remaining taxa with the corresponding threshold, and then returning a combined "consensus" cluster in which taxa may be assigned to multiple clusters. See also -------- link_clustering """ g = nx.Graph() for taxon in taxa: g.add_node(taxon) for idx, taxon in enumerate(taxa): new_matrix = [] for i, line in enumerate(matrix): for j, cell in enumerate(line): if i < j and i != idx and j != idx: new_matrix += [cell] new_matrix = misc.squareform(new_matrix) clusters = cluster.flat_cluster( method, threshold, new_matrix, [t for t in taxa if t != taxon]) for clr in clusters: for tA, tB in util.combinations2(clusters[clr]): if not g.has_edge(tA, tB): g.add_edge(tA, tB, weight=1) else: g[tA][tB]['weight'] += 1 out = {i + 1: c for i, c in enumerate(nx.find_cliques(g))} if revert: new_out = defaultdict(list) for key, val in out.items(): for v in val: new_out[v].append(key) return new_out return out
N = [] GT = nx.DiGraph() R = [] seen = set() rhs_dict = {} for key in sorted(T.keys()): # backwards version print "================================================" print "KEY:", key, T[key] print "================================================" subgraph = nx.Graph(T[key]) # for clique in nx.find_cliques(subgraph): while True: clique = [] for x in nx.find_cliques(subgraph): if len(x) > 1: clique = x break if len(clique) == 0: break sg = G.subgraph(clique) clique_str = ','.join(str(y) for y in sorted(clique)) N.append(clique_str) G.remove_edges_from(sg.edges()) subgraph.remove_edges_from(sg.edges()) subgraph.number_of_edges() # of the edges I just removed.. did I create any singletons that need to be grammarred singletons = [] for n in clique:
def test_directed(self): with pytest.raises(nx.NetworkXNotImplemented): next(nx.find_cliques(nx.DiGraph()))
features = pd.read_csv('../data/speaq_results/features.csv').iloc[:, 1:] MAX_CLIQUES = 100000 PEARSON_CORRELATION_THRESHOLDS = np.arange(.5, .95, .05) MINIMUM_PERCENTAGE_OF_PEAKS_MATCHED_THRESHOLDS = np.arange(.3, .9, .1) results = [] peak_db = generate_db(data_dir) for pearson_thresh in PEARSON_CORRELATION_THRESHOLDS: corr_mat = features.corr('spearman') g = nx.Graph(corr_mat > pearson_thresh) cliqs = [] for idx, cliq in enumerate(nx.find_cliques(g)): if idx > MAX_CLIQUES: raise Exception(f"More than {MAX_CLIQUES} cliques found") cliqs.append(cliq) logger.info( f"pearson_thresh: {pearson_thresh:.2f} -- identified {len(cliqs)} cliques" ) for missing_thresh in MINIMUM_PERCENTAGE_OF_PEAKS_MATCHED_THRESHOLDS: found = False identified_metabolites = {} for idx, cliq in enumerate(cliqs): res = peak_db.query_n(peakset2ppm(cliq, filled), missing_thresh=missing_thresh) for qs in res:
###Disparity calculation#### disp_c = disparity(imgL_c,imgR_c) disp_p = disparity(imgL_p,imgR_p) ###finding 3D coord ### tri_c = triangulation(disp_c,data[:,2:]) tri_p = triangulation(disp_p,data[:,:2]) #finding index of inliers th = .2 dist_c = scipy.spatial.distance.cdist(tri_c, tri_c, 'sqeuclidean') dist_p = scipy.spatial.distance.cdist(tri_p, tri_p, 'sqeuclidean') mask = (abs(dist_c-dist_p) < th).astype('uint8') G = nx.from_numpy_matrix(mask) list_cliq = list(find_cliques(G)) length = np.asarray([len(i) for i in list_cliq]) max_cliq_node = list_cliq[np.argmax(length)] ####inliers#### world_c_h = tri_c[max_cliq_node] world_p_h = tri_p[max_cliq_node] img_c = data[:,2:][max_cliq_node] img_p = data[:,:2][max_cliq_node] img_p_h = np.concatenate((img_p,np.ones((len(img_p),1))),axis=1) img_c_h = np.concatenate((img_c,np.ones((len(img_c),1))),axis=1) print(img_c_h.shape) ########feeding in random idx######
''' Finding cliques (I) You're now going to practice finding cliques in G. Recall that cliques are "groups of nodes that are fully connected to one another", while a maximal clique is a clique that cannot be extended by adding another node in the graph. Instructions 100xp Count the number of maximal cliques present in the graph and print it. Use the nx.find_cliques() function of G to find the maximal cliques. The nx.find_cliques() function returns a generator object. To count the number of maximal cliques, you need to first convert it to a list with list() and then use the len() function. Place this inside a print() function to print it. Take Hint (-30xp) ''' # Calculate the maximal cliques in G: cliques cliques = nx.find_cliques(G) # Count and print the number of maximal cliques in G print(len(list(cliques))) ''' Finding cliques (II) Great work! Let's continue by finding a particular maximal clique, and then plotting that clique. Instructions 100xp Find the author(s) that are part of the largest maximal clique, and plot the subgraph of that/one of those clique(s) using a CircosPlot. To do this: Use the nx.find_cliques() function to calculate the maximal cliques in G. Place this within the provided sorted() function to calculate the largest maximal clique. Create the subgraph consisting of the largest maximal clique using the .subgraph() method and largest_clique.
def to_junction_tree(self): """ Creates a junction tree (or clique tree) for a given markov model. For a given markov model (H) a junction tree (G) is a graph 1. where each node in G corresponds to a maximal clique in H 2. each sepset in G separates the variables strictly on one side of the edge to other. Examples -------- >>> from pgmpy.models import MarkovModel >>> from pgmpy.factors.discrete import DiscreteFactor >>> mm = MarkovModel() >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7']) >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'), ... ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'), ... ('x4', 'x7'), ('x5', 'x7')]) >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()] >>> mm.add_factors(*phi) >>> junction_tree = mm.to_junction_tree() """ from pgmpy.models import JunctionTree # Check whether the model is valid or not self.check_model() # Triangulate the graph to make it chordal triangulated_graph = self.triangulate() # Find maximal cliques in the chordal graph cliques = list(map(tuple, nx.find_cliques(triangulated_graph))) # If there is only 1 clique, then the junction tree formed is just a # clique tree with that single clique as the node if len(cliques) == 1: clique_trees = JunctionTree() clique_trees.add_node(cliques[0]) # Else if the number of cliques is more than 1 then create a complete # graph with all the cliques as nodes and weight of the edges being # the length of sepset between two cliques elif len(cliques) >= 2: complete_graph = UndirectedGraph() edges = list(itertools.combinations(cliques, 2)) weights = list( map(lambda x: len(set(x[0]).intersection(set(x[1]))), edges)) for edge, weight in zip(edges, weights): complete_graph.add_edge(*edge, weight=-weight) # Create clique trees by minimum (or maximum) spanning tree method clique_trees = JunctionTree( nx.minimum_spanning_tree(complete_graph).edges()) # Check whether the factors are defined for all the random variables or not all_vars = itertools.chain( *[factor.scope() for factor in self.factors]) if set(all_vars) != set(self.nodes()): ValueError( 'DiscreteFactor for all the random variables not specified') # Dictionary stating whether the factor is used to create clique # potential or not # If false, then it is not used to create any clique potential is_used = {factor: False for factor in self.factors} for node in clique_trees.nodes(): clique_factors = [] for factor in self.factors: # If the factor is not used in creating any clique potential as # well as has any variable of the given clique in its scope, # then use it in creating clique potential if not is_used[factor] and set(factor.scope()).issubset(node): clique_factors.append(factor) is_used[factor] = True # To compute clique potential, initially set it as unity factor var_card = [self.get_cardinality()[x] for x in node] clique_potential = DiscreteFactor(node, var_card, np.ones(np.product(var_card))) # multiply it with the factors associated with the variables present # in the clique (or node) # Thanh Dat if len(clique_factors) > 0: clique_potential *= factor_product(*clique_factors) clique_trees.add_factors(clique_potential) if not all(is_used.values()): raise ValueError( 'All the factors were not used to create Junction Tree.' 'Extra factors are defined.') return clique_trees
cnt = 0 for i, adj_list in tqdm(enumerate(data.adj_node.values)): #for i,adj_list in enumerate(data.adj_node.values): edges = [(i, item) for item in adj_list if item != i] if edges == []: # if cnt%100000==0: # print("ignore cnts %d" %cnt) cnt += 1 else: G.add_edges_from(edges) print("ignore cnts %d" % cnt) print('begin train') cnt = 0 max_clique = np.zeros(data.shape[0]) for clique in tqdm(nx.find_cliques(G)): #for clique in nx.enumerate_all_cliques(G): if cnt % 100000 == 0: print("deal cnts %d" % cnt) len_clique = len(clique) for item in clique: max_clique[item] = max(max_clique[item], len_clique) cnt += 1 print("totally max_clique %d" % cnt) n_train = train.shape[0] train_clique_data = max_clique[0:n_train] test_clique_data = max_clique[n_train:] pd_train_clique = pd.DataFrame(train_clique_data,
import networkx as nx import matplotlib.pyplot as plt graph = nx.karate_club_graph() # Graph about some karate club # Searching and finding all cliques less than 4 cliques = nx.find_cliques(graph) print('Cliques size less than 4 : %s' % [c for c in cliques if len(c) >= 4]) # Uniting cliques into communities #communities = nx.k_clique_communities(graph,k=4) #communities_list = [list(c) for c in communities] #nodes_list = [node for community in communities_list for node in communities] #print('Finding next communities :%s' % communities_list) # Conclusing #subgraph = graph.subgraph(nodes_list) pos = nx.spring_layout( graph ) # Для построений графа воспользуемся силовым алгоритмом Фрюхтена-Рейгольда nx.draw(graph, pos=pos, with_labels=True) plt.show()
# pagerank concept import random N = 7 G = nx.erdos_renyi_graph(20, 0.2) for (start, end) in G.edges: G.edges[start, end]['weight'] = random.random() t = nx.pagerank(G) print(t) nx.draw(G, with_labels=True) plt.show() #%% # connected component G = nx.erdos_renyi_graph(20, 0.1) print(list(nx.find_cliques(G))) t = sorted(list(nx.connected_components(G)), key=lambda x: len(x))[-1] print(t) L = nx.subgraph(G, t) nx.draw(L) plt.show() t = nx.connected_components(G) i = 1 for n in t: for k in n: G.nodes[k]['grouping'] = i G.nodes[k]['alphabetically'] = i i += 1
print(percentile(list(deg.values()), 25)) #first percentile print(median(list(deg.values()))) print(percentile(list(deg.values()), 75)) print(max(list(deg.values()))) # we can choose to only select the characters that have a degree > 10 # so these are relatively main charcters Gt = G.copy() dn = nx.degree(Gt) for n in Gt.nodes(): if dn[n] <= 10: Gt.remove_node(n) nx.draw_networkx(Gt, node_size=0, edge_color='b', alpha=.2, font_size=12) show() # so here are are observing the most relevant characters and their relationships # we can also find cliques from networkx import find_cliques cliques = list(find_cliques(G)) print(max(cliques, key=lambda l: len(l))) # print the biggest clique """ other resources opencv pandas scipy statsmodels nltk ipython """
def maximum_clique_exact_solve_np_hard(G_in): max_clique_number = nx.graph_clique_number(G_in) cliques = nx.find_cliques(G_in) for cl in cliques: if len(cl) == max_clique_number: return cl
min_mean_q, max_mean_q, min_std_q, max_std_q, )) c += 1 end = datetime.now() print 'times:', end - start G = prepare_graph([path + 'train.csv']) cnt = 0 max_clique = dict() for clique in nx.find_cliques(G): if cnt % 100000 == 0: print("deal cnts %d" % cnt) len_clique = len(clique) for item in clique: c = max_clique.get(item, []) c.append(len_clique) max_clique[item] = c cnt += 1 prepare_clique_stats(path + 'train.csv', path + 'train_clique_stats_sep.csv', max_clique) G = prepare_graph([path + 'train.csv', path + 'test.csv']) cnt = 0 max_clique = dict()
graph theory week4 notebook """ import networkx as nx import pygraphviz as pgv from nxpd import draw, nxpdParams nxpdParams['show'] = 'ipynb' G = nx.Graph() G.add_edges_from([('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e'), ('e', 'a'), ('f', 'a'), ('g', 'b'), ('h', 'c'), ('i', 'd'), ('j', 'e'), ('f', 'h'), ('h', 'j'), ('j', 'g'), ('g', 'i'), ('i', 'f'), ('j', 'i'), ('j', 'd'), ('d', 'g')]) draw(G, layout='circo') cliques = nx.find_cliques(G) max_clique = G.nodes()[0] for c in cliques: if (len(c) > len(max_clique)): max_clique = c for v in max_clique: G.node[v]['color'] = 'red' for u in max_clique: if u != v: G[u][v]['color'] = 'red' draw(G, layout='circo')
a[Z] - b[Z]) # function returns manhattan dist # part 1 largest_radius_bot = max(bots, key=lambda bot: bot[RADIUS]) print( 'part 1:', sum((manhattan(largest_radius_bot, bot) <= largest_radius_bot[RADIUS]) for bot in bots)) # part 2 # build a graph with edges between overlapping nanobots graph = nx.Graph() for bot in bots: # two bots overlap if their distance is smaller or equal than the sum of their ranges overlaps = [(bot, other) for other in bots if manhattan(bot, other) <= bot[RADIUS] + other[RADIUS]] graph.add_edges_from(overlaps) # find sets of overlapping nanobots (i.e. fully-connected sub-graphs) cliques = list(nx.find_cliques(graph)) cliques_size = [len(c) for c in cliques] assert len([ s for s in cliques_size if s == max(cliques_size) ]) == 1 # currently no tie breaking check so make sure it doesn't matter clique = max(cliques, key=len) # keep largest clique # calculate the point in the nanobots' radii which is closest to the origin - greedy strategy - not confident as general points = [manhattan(ORIGIN, bot) - bot[RADIUS] for bot in clique] # furthest away point in points needed to get all bots in the clique but closest to origin print('part 2:', max(points))
print("--------------------------------") N = 100 print("3. Extracting a subgraph on {} random nodes (just a test).".format(N)) G1 = G.subgraph(np.random.choice(G.nodes(), N)) print("Done.") #print("HACK! Replacing graph by subgraph"); G = G1 print("--------------------------------") print("4. Looking for maximal cliques in the subgraph.") C = list(nx.find_cliques(G1)) print("Done.") cc = [len(c) for c in C] (h, _) = np.histogram(cc, bins=range(1, 10)) print("Found: {} cliques.".format(len(cc))) print("Histogram of clique size:", h) print("--------------------------------") print("5. Looking for maximal cliques in the whole graph.") C = list(nx.find_cliques(G)) pickle.dump({'C': C}, open(output_filename_maxcliques, "wb")) print("Done.")
def run_cycle(self, xds_ascii_files, reference_idx=None): if len(xds_ascii_files) == 0: print >> self.out, "Error: no files given." return xscale_inp = os.path.join(self.workdir, "XSCALE.INP") xscale_lp = os.path.join(self.workdir, "XSCALE.LP") # Get averaged cell for scaling sg, cell, lcv, alcv = self.average_cells(xds_ascii_files) self.cell_info_at_cycles[self.get_last_cycle_number()] = (cell, lcv, alcv) # Choose directory containing XDS_ASCII.HKL and set space group (but how??) inp_out = open(xscale_inp, "w") inp_out.write("! This XSCALE.INP is generated by kamo.multi_merge.\n") inp_out.write( "! You may want to use yamtbx.run_xscale to re-run xscale by yourself\n" ) inp_out.write( "! because number of characters in line may exceed the limit of xscale.\n" ) inp_out.write("MAXIMUM_NUMBER_OF_PROCESSORS= %d\n" % self.nproc) inp_out.write("SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (sg, cell)) inp_out.write(self.xscale_inp_head) for i, xds_ascii in enumerate(xds_ascii_files): f = self.altfile.get(xds_ascii, xds_ascii) tmp = min(os.path.relpath(f, self.workdir), f, key=lambda x: len(x)) refstr = "*" if i == reference_idx else " " inp_out.write(" INPUT_FILE=%s%s\n" % (refstr, tmp)) if self.d_max is not None: d_range = (float("inf") if self.d_max is None else self.d_max, 0. if self.d_min is None else self.d_min) inp_out.write(" INCLUDE_RESOLUTION_RANGE= %.4f %.4f\n" % d_range) if len(self.xscale_params.corrections) != 3: inp_out.write(" CORRECTIONS= %s\n" % " ".join(self.xscale_params.corrections)) if (self.xscale_params.frames_per_batch, self.xscale_params.degrees_per_batch).count(None) < 2: xactmp = XDS_ASCII(f, read_data=False) frame_range = xactmp.get_frame_range() osc_range = xactmp.osc_range nframes = frame_range[1] - frame_range[0] + 1 if self.xscale_params.frames_per_batch is not None: nbatch = int( numpy.ceil(nframes / self.xscale_params.frames_per_batch)) else: nbatch = int( numpy.ceil(nframes / self.xscale_params.degrees_per_batch * osc_range)) print >> self.out, "frame range of %s is %d,%d setting NBATCH= %d" % ( f, frame_range[0], frame_range[1], nbatch) inp_out.write(" NBATCH= %d\n" % nbatch) inp_out.close() print >> self.out, "DEBUG:: running xscale with %3d files.." % len( xds_ascii_files) try: xscale.run_xscale(xscale_inp, cbf_to_dat=True, aniso_analysis=True, use_tmpdir_if_available=self.xscale_params. use_tmpdir_if_available) except: print >> self.out, traceback.format_exc() xscale_log = open(xscale_lp).read() if "!!! ERROR !!! INSUFFICIENT NUMBER OF COMMON STRONG REFLECTIONS." in xscale_log: print >> self.out, "DEBUG:: Need to choose files." # From XDS ver. March 1, 2015, it kindly informs which dataset has no common reflections. # ..but does not print the table. Sometimes only one dataset is left. Should we make table by ourselves? # Older versions just print correlation table and stop. if "CORRELATIONS BETWEEN INPUT DATA SETS AFTER CORRECTIONS" in xscale_log: G = xscalelp.construct_data_graph(xscale_lp, min_common_refs=10) #nx.write_dot(G, os.path.join(self.workdir, "common_set_graph.dot")) cliques = [c for c in nx.find_cliques(G)] cliques.sort(key=lambda x: len(x)) if self._counter == 1: max_clique = cliques[-1] else: idx_prevfile = 1 if self.reference_file else 0 max_clique = filter( lambda x: idx_prevfile in x, cliques)[-1] # xscale.hkl must be included! if self.reference_file: max_clique = [ 0, ] + filter(lambda x: x != 0, max_clique) for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) try_later = map( lambda i: xds_ascii_files[i], filter(lambda x: x not in max_clique, G.nodes())) print >> self.out, "DEBUG:: %d files can be merged. %d files will be merged later." % ( len(max_clique), len(try_later)) print >> self.out, "DEBUG:: %d files are of no use." % ( len(xds_ascii_files) - len(G.nodes())) for i in filter(lambda j: j not in G.nodes(), xrange(len(xds_ascii_files))): self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "no_common_refls" self.run_cycle(map(lambda i: xds_ascii_files[i], max_clique)) assert len( try_later ) <= 0 # Never be the case with newer xscale!! (if the case, check_remove_list() should be modified to skip_num+=1 if len(try_later) > 0: print >> self.out, "Trying to merge %d remaining files.." % len( try_later) next_files = [os.path.join(self.workdir, "xscale.hkl") ] + try_later if self.reference_file: next_files = [ self.reference_file, ] + next_files self.workdir = self.request_next_workdir() self.run_cycle(next_files) return else: bad_idxes = xscalelp.read_no_common_ref_datasets(xscale_lp) print >> self.out, "DEBUG:: %d files are of no use." % ( len(bad_idxes)) for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) # XXX Actually, not all datasets need to be thrown.. some of them are useful.. for i in bad_idxes: self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "no_common_refls" self.run_cycle( map( lambda i: xds_ascii_files[i], filter(lambda j: j not in bad_idxes, xrange(len(xds_ascii_files))))) return elif "!!! ERROR !!! USELESS DATA ON INPUT REFLECTION FILE" in xscale_log: print >> self.out, "DEBUG:: Need to discard useless data." unuseful_data = [ xscalelp.get_read_data(xscale_lp)[-1] ] #filter(lambda x: x[2]==0, xscalelp.get_read_data(xscale_lp)) if len(unuseful_data) == 0: print >> self.out, "I don't know how to fix it.." return remove_idxes = map(lambda x: x[0] - 1, unuseful_data) remove_idxes = self.check_remove_list(remove_idxes) keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files))) for i in remove_idxes: self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = "useless" for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes)) return elif "INACCURATE SCALING FACTORS." in xscale_log: # Actually I don't know how to fix this.. (bug?) but worth proceeding (discarding bad data may solve problem). print >> self.out, "'INACCURATE SCALING FACTORS' happened.. but ignored." elif "!!! ERROR !!!" in xscale_log: print >> self.out, "Unknown error! please check the XSCALE.LP and fix the program." return # Re-scale by changing reference rescale_for = None if len(self.reject_method) == 0: rescale_for = self.reference_choice # may be None elif reference_idx is None: rescale_for = "bmed" if rescale_for is not None and len(xds_ascii_files) > 1: ref_num = xscale.decide_scaling_reference_based_on_bfactor( xscale_lp, rescale_for, return_as="index") if reference_idx != ref_num: print >> self.out, "Rescaling with %s" % rescale_for for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(xds_ascii_files, reference_idx=ref_num) if len(self.reject_method) == 0: return # Remove bad data remove_idxes = [] remove_reasons = {} if self.reject_method[0] == "framecc": print >> self.out, "Rejections based on frame CC" from yamtbx.dataproc.xds.command_line import xscale_cc_against_merged # list of [frame, n_all, n_common, cc] in the same order framecc = xscale_cc_against_merged.run( hklin=os.path.join(self.workdir, "xscale.hkl"), output_dir=self.workdir).values() if self.reject_params.framecc.method == "tukey": ccs = numpy.array( map(lambda x: x[3], reduce(lambda x, y: x + y, framecc))) ccs = ccs[ccs == ccs] # Remove nan q25, q75 = numpy.percentile(ccs, [25, 75]) cc_cutoff = q25 - self.reject_params.framecc.iqr_coeff * (q75 - q25) print >> self.out, " frameCC cutoff = %.4f (%.2f*IQR)" % ( cc_cutoff, self.reject_params.framecc.iqr_coeff) else: cc_cutoff = self.reject_params.framecc.abs_cutoff print >> self.out, " frameCC cutoff = %.4f (value specified)" % cc_cutoff for i, cclist in enumerate(framecc): useframes = map(lambda x: x[0], filter(lambda x: x[3] > cc_cutoff, cclist)) if len(useframes) == 0: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("allbadframe") continue f = xds_ascii_files[i] xac = XDS_ASCII(f) if set(useframes).issuperset( set(range(min(xac.iframe), max(xac.iframe)))): continue # All useful frames. sel = xac.iframe == useframes[0] for x in useframes[1:]: sel |= xac.iframe == x if sum(sel) < 10: # XXX care I/sigma remove_idxes.append(i) remove_reasons.setdefault(i, []).append("allbadframe") continue print >> self.out, "Extracting frames %s out of %d-%d in %s" % ( ",".join(map(str, useframes)), min( xac.iframe), max(xac.iframe), f) newf = self.request_file_modify(f) xac.write_selected(sel, newf) self.reject_method.pop(0) # Perform only once elif self.reject_method[0] == "lpstats": if "bfactor" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >> self.out, "Rejections based on B-factor outliers (%.2f*IQR)" % iqrc Bs = numpy.array( map(lambda x: x[1], xscalelp.get_k_b(xscale_lp))) if len(Bs) > 1: # If one data, K & B table is not available. q25, q75 = numpy.percentile(Bs, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc * iqr, q75 + iqrc * iqr count = 0 for i, b in enumerate(Bs): if b < lowlim or b > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_B") count += 1 print >> self.out, " %4d B-factor outliers (<%.2f, >%.2f) removed" % ( count, lowlim, highlim) else: print >> self.out, " B-factor outlier rejection is not available." if "em.b" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >> self.out, "Rejections based on error model b outliers (%.2f*IQR)" % iqrc bs = numpy.array( map(lambda x: x[1], xscalelp.get_ISa(xscale_lp))) q25, q75 = numpy.percentile(bs, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc * iqr, q75 + iqrc * iqr count = 0 for i, b in enumerate(bs): if b < lowlim or b > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_em.b") count += 1 print >> self.out, " %4d error model b outliers (<%.2f, >%.2f) removed" % ( count, lowlim, highlim) if "em.ab" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >> self.out, "Rejections based on error model a*b outliers (%.2f*IQR)" % iqrc vals = numpy.array( map(lambda x: x[0] * x[1], xscalelp.get_ISa(xscale_lp))) q25, q75 = numpy.percentile(vals, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc * iqr, q75 + iqrc * iqr count = 0 for i, ab in enumerate(vals): if ab < lowlim or ab > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_em.ab") count += 1 print >> self.out, " %4d error model a*b outliers (<%.2f, >%.2f) removed" % ( count, lowlim, highlim) if "rfactor" in self.reject_params.lpstats.stats: iqrc = self.reject_params.lpstats.iqr_coeff print >> self.out, "Rejections based on R-factor outliers (%.2f*IQR)" % iqrc rstats = xscalelp.get_rfactors_for_each(xscale_lp) vals = numpy.array(map(lambda x: rstats[x][-1][1], rstats)) # Read total R-factor q25, q75 = numpy.percentile(vals, [25, 75]) iqr = q75 - q25 lowlim, highlim = q25 - iqrc * iqr, q75 + iqrc * iqr count = 0 for i, v in enumerate(vals): if v < lowlim or v > highlim: remove_idxes.append(i) remove_reasons.setdefault(i, []).append("bad_R") count += 1 print >> self.out, " %4d R-factor outliers (<%.2f, >%.2f) removed" % ( count, lowlim, highlim) if "pairwise_cc" in self.reject_params.lpstats.stats: corrs = xscalelp.get_pairwise_correlations(xscale_lp) if self.reject_params.lpstats.pwcc.method == "tukey": q25, q75 = numpy.percentile(map(lambda x: x[3], corrs), [25, 75]) iqr = q75 - q25 lowlim = q25 - self.reject_params.lpstats.pwcc.iqr_coeff * iqr print >> self.out, "Rejections based on pairwise_cc < %.4f (IQR=%.2f)" % ( lowlim, iqr) else: lowlim = self.reject_params.lpstats.pwcc.abs_cutoff print >> self.out, "Rejections based on pairwise_cc < %.4f" % lowlim bad_corrs = filter(lambda x: x[3] < lowlim, corrs) idx_bad = {} for i, j, common_refs, corr, ratio, bfac in bad_corrs: idx_bad[i] = idx_bad.get(i, 0) + 1 idx_bad[j] = idx_bad.get(j, 0) + 1 idx_bad = idx_bad.items() idx_bad.sort(key=lambda x: x[1]) count = 0 for idx, badcount in reversed(idx_bad): remove_idxes.append(idx - 1) remove_reasons.setdefault(idx - 1, []).append("bad_pwcc") bad_corrs = filter(lambda x: idx not in x[:2], bad_corrs) if len(bad_corrs) == 0: break fun_key = lambda x: x[3] print >> self.out, " Removing idx=%d (CC %.3f..%.3f) remaining %d bad pairs" % ( idx, min(bad_corrs, key=fun_key)[3], max(bad_corrs, key=fun_key)[3], len(bad_corrs)) count += 1 print >> self.out, " %4d pairwise CC outliers removed" % count self.reject_method.pop(0) # Perform only once elif self.reject_method[0] == "delta_cc1/2": print >> self.out, "Rejection based on delta_CC1/2 in %s shell" % self.delta_cchalf_bin table = xscalelp.read_stats_table(xscale_lp) i_stat = -1 if self.delta_cchalf_bin == "total" else -2 prev_cchalf = table["cc_half"][i_stat] prev_nuniq = table["nuniq"][i_stat] # file_name->idx table remaining_files = collections.OrderedDict( map(lambda x: x[::-1], enumerate(xds_ascii_files))) # For consistent resolution limit inp_head = self.xscale_inp_head + "SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % ( sg, cell) count = 0 for i in xrange(len(xds_ascii_files) - 1): # if only one file, cannot proceed. tmpdir = os.path.join(self.workdir, "reject_test_%.3d" % i) cchalf_list = xscale.calc_cchalf_by_removing( wdir=tmpdir, inp_head=inp_head, inpfiles=remaining_files.keys(), stat_bin=self.delta_cchalf_bin, nproc=self.nproc, nproc_each=self.nproc_each, batchjobs=self.batchjobs) rem_idx, cc_i, nuniq_i = cchalf_list[ 0] # First (largest) is worst one to remove. rem_idx_in_org = remaining_files[remaining_files.keys() [rem_idx]] # Decision making by CC1/2 print >> self.out, "DEBUG:: cycle %.3d remove %3d if %.2f*%d > %.2f*%d" % ( i, rem_idx_in_org, cc_i, nuniq_i, prev_cchalf, prev_nuniq) if cc_i * nuniq_i <= prev_cchalf * prev_nuniq: break print >> self.out, "Removing idx= %3d gained CC1/2 by %.2f" % ( rem_idx_in_org, cc_i - prev_cchalf) prev_cchalf, prev_nuniq = cc_i, nuniq_i remove_idxes.append(rem_idx_in_org) remove_reasons.setdefault(rem_idx_in_org, []).append("bad_cchalf") del remaining_files[remaining_files.keys() [rem_idx]] # remove file from table count += 1 print >> self.out, " %4d removed by DeltaCC1/2 method" % count if self.next_delta_cchalf_bin != []: self.delta_cchalf_bin = self.next_delta_cchalf_bin.pop(0) else: self.reject_method.pop(0) else: print >> self.out, "ERROR:: Unsupported reject_method (%s)" % reject_method # Remove duplicates remove_idxes = list(set(remove_idxes)) remove_idxes = self.check_remove_list(remove_idxes) if len(remove_idxes) > 0: print >> self.out, "DEBUG:: Need to remove %d files" % len( remove_idxes) for i in sorted(remove_idxes): print >> self.out, " %.3d %s" % (i, xds_ascii_files[i]) self.removed_files.append(xds_ascii_files[i]) self.removed_reason[xds_ascii_files[i]] = ",".join( remove_reasons[i]) # Next run keep_idxes = filter(lambda x: x not in remove_idxes, xrange(len(xds_ascii_files))) if len(self.reject_method) > 0 or len(remove_idxes) > 0: self.workdir = self.request_next_workdir() self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes)) elif self.reference_choice is not None and len(keep_idxes) > 1: # Just re-scale with B reference ref_num = xscale.decide_scaling_reference_based_on_bfactor( xscale_lp, self.reference_choice, return_as="index") if reference_idx != ref_num: print >> self.out, "Rescaling2 with %s" % self.reference_choice for f in "XSCALE.INP", "XSCALE.LP": util.rotate_file(os.path.join(self.workdir, f)) self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes), reference_idx=ref_num)
''' Finding cliques (II) Great work! Let's continue by finding a particular maximal clique, and then plotting that clique. Instructions 100 XP Find the author(s) that are part of the largest maximal clique, and plot the subgraph of that/one of those clique(s) using a CircosPlot. To do this: Use the nx.find_cliques() function to calculate the maximal cliques in G. Place this within the provided sorted() function to calculate the largest maximal clique. Create the subgraph consisting of the largest maximal clique using the .subgraph() method and largest_clique. Create the CircosPlot object using the subgraph G_lc (without any other arguments) and plot it. ''' SOLUTION # Import necessary modules import networkx as nx from nxviz import CircosPlot import matplotlib.pyplot as plt # Find the author(s) that are part of the largest maximal clique: largest_clique largest_clique = sorted(nx.find_cliques(G), key=lambda x: len(x))[-1] # Create the subgraph of the largest_clique: G_lc G_lc = G.subgraph(largest_clique) # Create the CircosPlot object: c c = CircosPlot(G_lc) # Draw the CircosPlot to the screen c.draw() plt.show()
fin = open("GameOfThrones.txt", 'rb') G = nx.read_edgelist('GameOfThrones.txt', nodetype=str, delimiter=",", data=(("weight", int), ("season", int))) fin.close() drawGraph(G) ''' ################################################ Step 2 output: a) No. of maximal cliques b) Size of largest maximal clique c) No. of maximal cliques of the largest size ##################################################''' maxCliques = nx.find_cliques(G) biggestClique = 0 numBiggestCliques = 0 ctr = 0 for clique in maxCliques: ctr += 1 if (len(clique) > biggestClique): biggestClique = len(clique) print("Number of Maximal Cliques: " + str(ctr)) print("Size of Largest Maximal Clique: " + str(biggestClique)) #Could only loop through generator once, so just remake it maxCliques = nx.find_cliques(G) ctr = 0
c += 1 end = datetime.now() print 'times:', end - start G = prepare_graph([ path + 'train_unigram.csv', path + 'test_unigram.csv', ]) count = 0 d_nodes = dict() d_edges = dict() keywords = [] for c in nx.find_cliques(G): l_nodes = len(c) # l_edges = len(c.edges()) if l_nodes <= 2: continue start = True key = [] for node in c: if start: key = set(node.split(' ')) start = False else: key = set(node.split(' ')).intersection(key) if len(key) == 0: break # print(key)
def construct(DATA, SENTIDIFF_THRES): """ :param DATA: is a DataFrame with columns 'Person, Topic, 'Sentiment', and 'Speech'. :param SENTIDIFF_THRES: :return: """ # ================================================================================ # ----- FOR DEBUGGING # TIME_FRAME = '2017' # METHOD = 'nmf' # PATH = f"results/" # PARAMETERS # text = pd.read_csv(f"{PATH}{TIME_FRAME}/ssm_results_{TIME_FRAME}.csv") # thresholds = pd.read_csv(f"{PATH}") # ================================================================================ # ----- Construct Weighted Graph startTime = tm.perf_counter() G = nx.Graph() G.clear() # ----- Add nodes print('\nAdding nodes for graph...') for i in DATA.index: row = DATA.loc[i] person = row['Person'] # Only add actor if the actor hasn't already been added if not G.has_node(person): # Construct dataFrame for text attribute of node # Extract all text from the actor data = DATA[DATA['Person'] == person] data.index = range(len(data)) # Add node with its corresponding attributes G.add_node( person, gender=row['Gender'], party=row['Party'], metro=row['Metro'], data=data ) # Print progress... if i % 50 == 0: print(f"{i:{5}} of {len(DATA):{5}}\t{dict(row['Speech_id Date Person Party'.split()])}") print('All nodes of graph succesfully added!') # ----- Add edges print('\nAdding edges for graph...') for i, row_i in DATA.iterrows(): # Extract name, topic and sentiment of person1 p_i = row_i['Person'] t_i = row_i['Topic'] s_i = row_i['Senti_comp'] for j, row_j in DATA[:i+1].iterrows(): # Extract name, topic and sentiment of person2 p_j = row_j['Person'] t_j = row_j['Topic'] s_j = row_j['Senti_comp'] # Print progress... if (i%50 == 0) and (j%50 == 0): print( f"{i:{5}},{j:{5}} of {len(DATA):{5}}\t{p_i:{20}}{p_j:{20}}\tt_i: {int(t_i)}\tt_j: {int(t_j)}") # Both actors cannot be the same person # Both actors must spoke of the same topic # Both sentiment of the same topic must be of the same polarity if (p_i != p_j) and (t_i == t_j) and (s_i*s_j > 0): # Compute sentiment difference sentiDiff = abs(s_i - s_j) # Both sentiment towards the topic must be less than the threshold if sentiDiff < SENTIDIFF_THRES: # If there is no edge between both actors, construct an edge. Otherwise, update attribtes of the existing edge. if not G.has_edge(p_i, p_j): agreedSpeeches = { 'topic' : t_i, 'sentiDiff': sentiDiff, 'text' : pd.DataFrame([row_i, row_j]) } G.add_edge(p_i, p_j, weight=1, agreedSpeeches=[agreedSpeeches]) else: # Extract text from already existing edge edgeData = G.get_edge_data(p_i, p_j) # Compute new weight and update weight text weight_old = edgeData['weight'] weight_new = weight_old + 1 # Construct new agreedSpeeches dict and append to existing agreedSpeeches agreedSpeeches_old = edgeData['agreedSpeeches'] agreedSpeeches_new = [{ 'topic' : t_i, 'sentiDiff': sentiDiff, 'text' : pd.DataFrame([row_i, row_j]) }] agreedSpeeches_new.append(agreedSpeeches_old) # Update information of the edge G.add_edge(p_i, p_j, weight=weight_new, agreedSpeeches=agreedSpeeches_new) print('All edges of graph succesfully added!') # ================================================================================ # ----- Compute degree of centrality and add as node attribute # Centrality has to be normalised to the max possible number of agreements a node can have # This is computed by (number of speeches made by actor)*[(total number of speeches) - (number of speeches made by actor)] # G.degree() returns the number of edges adjacent to a node, taking into account of the edge weight cent = {n: G.degree(n, weight='weight') for n in list(G.node)} cent = pd.DataFrame.from_dict(cent, orient='index', columns='degree'.split()) # Compute number of speeches each actor have made actorSpeechCnt = {} for n in list(G.node): actorSpeechCnt[n] = len(DATA[DATA['Person'] == n]) # Compute normalised degree of centrality cent_norm = {} for n in list(G.node): cent_max = actorSpeechCnt[n]*(len(DATA) - actorSpeechCnt[n]) cent_norm[n] = cent['degree'].loc[n]/cent_max cent_norm = pd.DataFrame.from_dict(cent_norm, orient='index', columns='centrality'.split()) # Place normalised text in dataFrame and sort according it cent['centrality'] = cent_norm cent.sort_values(by='centrality', ascending=False, inplace=True) # Add centrality information to node attribute nx.set_node_attributes(G, cent['centrality'], 'centrality') # ================================================================================ # ----- Compute cliques and add clique group number as node attribute # Construct a dictionary containing cliques within the network labeled by its clique# cliqueList = list(enumerate(nx.find_cliques(G))) # For every actor in the network, search all networkCliques to find if the actor is in it # Return a dict of actors and the corresponding clique# that the actor is in cliqueNum = {} actors = np.sort(list(G.node)) for p in actors: inClique = [] for i, clq in enumerate(cliqueList): if p in clq: inClique.append(i) cliqueNum[p] = inClique # Add clique information to node attribute nx.set_node_attributes(G, cliqueNum, 'cliques') dur = tm.gmtime(tm.perf_counter() - startTime) print(f"\nGraph construction complete!") print(f"Construction took {dur.tm_sec}s") print(f"{len(cliqueList)} cliques found") # Print percentage of edges removed by threshold # ===================================================================================== # ----- FOR DEBUGGING # # Save results # nx.write_gpickle(G, f"{PATH}{TIME_FRAME}/ssm_weightedGraph_{TIME_FRAME}.gpickle") # cent.to_csv(f"{PATH}{TIME_FRAME}/ssm_centrality_{TIME_FRAME}.csv") # with open(f"{PATH}{TIME_FRAME}/ssm_cliques_{TIME_FRAME}.pickle", "wb") as file: # pickle.dump(cliques, file) # ================================================================================ return G, cent, cliqueList
g.add_nodes_from(words) #add the words we identified as nodes for i in range(0, cooc_upper.shape[0]): for j in range(0, cooc_upper.shape[1]): if cooc_upper[i, j] == 1: # add an edge only if both values are provided g.add_edge(words[i], words[j]) # Remove nodes with no edges degree = g.degree() for n in g.nodes(): if degree[n] == 0: g.remove_node(n) #Find maximal cliques and visualise coords = nx.spring_layout(g) # remove "len(clique)>2" if you're interested in maxcliques with 2 edges cliques = [clique for clique in nx.find_cliques(g) if len(clique) > 2] #draw the graph for clique in cliques: print "Clique to appear: ", clique H = g.subgraph(clique) col = colors.next() nx.draw_networkx(H, node_colors=col, with_Lables=True) plt.show() plt.clf()
def test_find_cliques1(self): cl = list(nx.find_cliques(self.G)) rcl = nx.find_cliques_recursive(self.G) expected = [[2, 6, 1, 3], [2, 6, 4], [5, 4, 7], [8, 9], [10, 11]] assert_equal(sorted(map(sorted, cl)), sorted(map(sorted, rcl))) assert_equal(sorted(map(sorted, cl)), sorted(map(sorted, expected)))
def k_clique_communities(G, k, cliques=None): """Find k-clique communities in graph using the percolation method. A k-clique community is the union of all cliques of size k that can be reached through adjacent (sharing k-1 nodes) k-cliques. Parameters ---------- G : NetworkX graph k : int Size of smallest clique cliques: list or generator Precomputed cliques (use networkx.find_cliques(G)) Returns ------- Yields sets of nodes, one for each k-clique community. Examples -------- >>> G = nx.complete_graph(5) >>> K5 = nx.convert_node_labels_to_integers(G,first_label=2) >>> G.add_edges_from(K5.edges()) >>> c = list(nx.k_clique_communities(G, 4)) >>> list(c[0]) [0, 1, 2, 3, 4, 5, 6] >>> list(nx.k_clique_communities(G, 6)) [] References ---------- .. [1] Gergely Palla, Imre Derényi, Illés Farkas1, and Tamás Vicsek, Uncovering the overlapping community structure of complex networks in nature and society Nature 435, 814-818, 2005, doi:10.1038/nature03607 """ if k < 2: raise nx.NetworkXError("k=%d, k must be greater than 1." % k) if cliques is None: cliques = nx.find_cliques(G) cliques = [frozenset(c) for c in cliques if len(c) >= k] # First index which nodes are in which cliques membership_dict = defaultdict(list) for clique in cliques: for node in clique: membership_dict[node].append(clique) # For each clique, see which adjacent cliques percolate perc_graph = nx.Graph() perc_graph.add_nodes_from(cliques) for clique in cliques: for adj_clique in _get_adjacent_cliques(clique, membership_dict): if len(clique.intersection(adj_clique)) >= (k - 1): perc_graph.add_edge(clique, adj_clique) # Connected components of clique graph with perc edges # are the percolated cliques for component in nx.connected_components(perc_graph): yield (frozenset.union(*component))
def test_directed(self): cliques = nx.find_cliques(nx.DiGraph())
topology = iu.module_from_spec(spec) spec.loader.exec_module(topology) def betti(C): return topology.betti_bin_cpp(C, worker="../cpp/UV/rank") #return topology.betti_bin(C) import networkx as nx import numpy as np import itertools G = nx.fast_gnp_random_graph(15, 0.6, seed=0) C = list(nx.find_cliques(G)) H = nx.make_max_clique_graph(G) assert (H.number_of_nodes() == len(C)) for (a, b) in H.edges(): assert (len(set(C[a]).intersection(C[b]))) print("Full:") print("G:", betti(nx.find_cliques(G))) print("H:", betti(nx.find_cliques(H))) H.remove_nodes_from([n for n in H.nodes() if (len(C[n]) <= 3)]) print("After removal:") print("G:", betti(nx.find_cliques(G))) print("H:", betti(nx.find_cliques(H)))