Exemple #1
0
def Cliques(G) :
    """
    Return a list of cliques the player belongs to
    """
    nx.find_cliques(G)
    cliques = []
    for i in G.nodes() :
        cliques.append(nx.cliques_containing_node(G, i))
    return cliques
Exemple #2
0
def	main():
	ap = argparse.ArgumentParser()
	ap.add_argument("--graph", type=str, help="graph file")
	arg = ap.parse_args()
	result = get_graph(arg.graph)
	cliques = [clique for clique in nx.find_cliques(result) if len(clique)>2]
	print cliques
Exemple #3
0
 def test_selfloops(self):
     self.G.add_edge(1,1)
     cl=list(nx.find_cliques(self.G))
     rcl=nx.find_cliques_recursive(self.G)
     assert_equal(sorted(map(sorted,cl)), sorted(map(sorted,rcl)))
     assert_equal(cl,
                  [[2, 6, 1, 3], [2, 6, 4], [5, 4, 7], [8, 9], [10, 11]])
def all_cliques(graph, count):
    """ Now, given a large graph, sample the cliques and test for homogeneity
    Parameters
    ----------
    graph : a networkx graph
    
    Method
    ------
    * creates a mapping from nodes to communities
    * uses networkx to generate several cliques and maps the clique members to
      communities, if the clique has at least 4 members
    """
    pf = open('cliques_within_the_graph.pkl', 'wb')
    
    part = CD.modularity_run(graph)
    cgen = nx.find_cliques(graph)
    found = []
    
    for i in xrange(count):
        try:
            clump = cgen.next()
            if len(clump) > 2:
                found.append( ([part[n] for n in clump], clump) )
        except:
            pickle.dump( (graph, part, found) , pf)
            pf.close()
            return found
            
    pickle.dump( (graph, part, found) , pf)
    pf.close()
    return found
def main():
    import networkx as nx
    global GENETIC_CODE, AA_SEQ, AA_LENGTH, NUC_LENGTH, HAMMING_DIST
    GENETIC_CODE = {'L': ['CTT', 'CTC', 'CTA', 'CTG', 'TTA', 'TTG'], 'R': ['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'],
                    'S': ['TCT', 'TCC', 'TCA', 'TCG'], 'T': ['ACT', 'ACC', 'ACA', 'ACG']}
    AA_SEQ = 'RLRLRLRLRL'
    # AA_SEQ = 'RRRR'
    HAMMING_DIST = 8
    AA_LENGTH = len(AA_SEQ)
    NUC_LENGTH = 3*AA_LENGTH
    current_opts = [nucs for nucs in GENETIC_CODE[AA_SEQ[0]]]
    for aa in range(AA_LENGTH-1):
        new_opts = []
        for opt in current_opts:
            for nucs in GENETIC_CODE[AA_SEQ[len(current_opts[0])/3-1]]:
                new_opts.append(opt+nucs)
        current_opts = new_opts[:]

    print 'there are %i options' % len(current_opts)

    G = nx.Graph()
    [G.add_node(a) for a in current_opts]
    [G.add_edge(a, b) for a in current_opts for b in current_opts if different_enough(a, b)]

    print 'found %i edges' % len(G.edges())

    cliques = [a for a in nx.find_cliques(G)]

    print 'now printing cliques larger than 4'
    for clique in cliques:
        if len(clique) >= 10000:
            print clique
def getCliques(g):
    netscience_graph = g
    t0 = datetime.datetime.now()
    cliques = list(nx.find_cliques(netscience_graph))
    print datetime.datetime.now()-t0,' elapsed time.'
    print (len(cliques))
    print cliques[0]
Exemple #7
0
def compareGraphs(G, H, fields=('element',), tolerance=0.5, returnmatching=False):
    # Comparison algorithm based on:
    # "Chemoisosterism in the Proteome", X. Jalencas, J. Mestres, JCIM 2013
    # http://pubs.acs.org/doi/full/10.1021/ci3002974
    if G == H:
        if returnmatching:
            return 1, len(G), [(x, x) for x in G.nodes()]
        else:
            return 1

    if len(G.edges()) == 0 or len(H.edges()) == 0:
        if returnmatching:
            return 0, 0, []
        else:
            return 0

    Gprod = createProductGraph(G, H, tolerance, fields)

    # Calculate the maximal cliques and return the length of the largest one
    maxcliques = np.array(list(nx.find_cliques(Gprod)))
    cllen = np.array([len(x) for x in maxcliques])
    score = (cllen.max() / max(len(G.nodes()), len(H.nodes())))

    if returnmatching:
        return score, cllen.max(), maxcliques[cllen.argmax()]
    else:
        return score
Exemple #8
0
	def createR(self): 
		clases = set() 
		cliques = 0
		for q in  nx.find_cliques(self.G):
			if (len(q) <3) or (len(q)>6) : continue
			cliques += 1
			tmp_list_sign = self.getSetSignAASeq(q)['list_signature']
			self.how_many_signatures[tuple(tmp_list_sign)] += 1	
			L = ','.join(map(lambda(x):str(x),sorted(tmp_list_sign)))
			self.osisDictString[L].add(','.join(q))
			self.osisDict[L].add(tuple(q))
			map(lambda(i):self.osisDictElements[L].add(i),q)

			rcname =  hash(tuple(q))
			self.metainfo_node[rcname] = (set(q),tmp_list_sign)
			self.HG.add_node(rcname)
			for hn in self.HG.nodes():
				if self.metainfo_node[hn][0] & self.metainfo_node[rcname][0]:
					self.HG.add_edge(hn,rcname)

		classindex = 0
		for K in xrange(3,7):
			for signa in rcd.setSignatures[K]:
				self.RCCvector[classindex] = self.how_many_signatures[tuple(signa)]
				for n in self.HG.nodes():
					if self.metainfo_node[n][1] != signa: continue
					self.RCCvector2[classindex] += self.HG.degree(n)
				classindex += 1
 def f21(self):
     start = 0
     clique_list = list(nx.find_cliques(self.G))
     res = len(clique_list)
     stop = 0
     # self.feature_time.append(stop - start)
     return res
def get_percolated_cliques(G, k):
    percolation_graph = nx.Graph()
    cliques = [frozenset(c) for c in nx.find_cliques(G) if len(c) >= k]
    percolation_graph.add_nodes_from(cliques)

    # First index which nodes are in which cliques
    membership_dict = defaultdict(list)
    for clique in cliques:
        for node in clique:
            membership_dict[node].append(clique)

    def get_adjacent_cliques(clique, membership_dict):
        adjacent_cliques = set()
        for n in clique:
            for adj_clique in membership_dict[n]:
                if clique != adj_clique:
                    adjacent_cliques.add(adj_clique)
        return adjacent_cliques

    # For each clique, see which adjacent cliques percolate
    for clique in cliques:
        for adj_clique in get_adjacent_cliques(clique, membership_dict):
            if len(clique.intersection(adj_clique)) >= (k - 1):
                percolation_graph.add_edge(clique, adj_clique)

    print 'percolation graph nodes:', percolation_graph.nodes()
    print 'percolation graph edges:', percolation_graph.edges()

    # Connected components of clique graph with perc edges
    # are the percolated cliques
    for component in nx.connected_components(percolation_graph):
        yield (frozenset.union(*component))
Exemple #11
0
def plotUnweightedCommunities(G, k_clique, n_nodes,iw):

	cls = nx.find_cliques(G)
	communities = list(nx.k_clique_communities(G,k_clique ,cliques = cls))

	print(len(communities))

	pos=nx.graphviz_layout(G) # positions for all nodes


	plt.figure(figsize=(12,12))

	#colors = ["green","yellow","red","blue","pink","orange","gray","brown","black","white","purple","green","yellow","red","blue","pink","orange","gray","brown","black","white","purple"]

#	colors = ["#ff0000","#ff8000","#ffbf00","#ffff00","#bfff00","#80ff00","#40ff00","#00ff00", "#00ff40", "#00ff80", "#00ffbf", "#00ffff", "#00bfff", "#0080ff", "#0040ff", "#0000ff", "#4000ff", "#8000ff", "#bf00ff", "#ff00ff", "#ff00bf", "#ff0080", "#ff0040","#ff0000","#ff8000","#ffbf00","#ffff00","#bfff00","#80ff00","#40ff00","#00ff00", "#00ff40", "#00ff80", "#00ffbf", "#00ffff", "#00bfff", "#0080ff", "#0040ff", "#0000ff", "#4000ff", "#8000ff", "#bf00ff", "#ff00ff", "#ff00bf", "#ff0080", "#ff0040"]

	for i in range(len(communities)):
		nx.draw_networkx_nodes(G,pos,nodelist=list(communities[i]),node_color=colors[i%len(colors)])

	nx.draw_networkx_edges(G,pos,width=0.5)
			# labels
	nx.draw_networkx_labels(G,pos,font_size=10,font_family='sans-serif')

	plt.axis('off')
	plt.savefig("./communities/unweighted_"+"comm_"+"w"+str(iw)+"k"+str(k_clique)+".png") # save as png
	plt.close()
 def calculate_comembership(self, backend=False):
     logging.info("Calculating comembership.")
     
     nodes = self.graph.nodes()
     n = len(nodes)
     if not backend and n > 500:
         raise network_big.NetworkTooBigException(n)
     
     cliques = list(find_cliques(self.graph))
     
     w = {}
     for clique in cliques:
         for node1 in clique:
             for node2 in clique:
                 try:
                     w[node1,node2] += 1
                 except KeyError:
                     w[node1,node2] = 1
                     
     nodes = w.keys()
     comembership = float(0)
     for node1, node2 in nodes:
         if node1 != node2: comembership += w[node1,node2]
         
     num_nodes = len(self.graph.nodes())
     comembership /= num_nodes*(num_nodes-1)
     
     self.measures['comembership'] = comembership
     self.nodesmeasures['comembership'] = None
     self.edgesmeasures['comembership'] = w
	def find_cliques(self,g):
		import networkx as nx
		nodes=g.nodes()

		G=nx.Graph()
		G.add_nodes_from(nodes)
		
		for item1 in nodes:
			for item2 in nodes:

				if not item1==item2:
					if g.number_of_edges(item1, item2):
						G.add_edge(item1, item2)
		cliques=[item for item in list(nx.find_cliques(G)) if len(item)>=3]
	
		checked_clique=[]#一个clique中的所有节点在一句话出现
		for cl in cliques:
			flag=False
			for item in self.keywords:
				occur=0
				for word in cl:
					if word in item:
						occur=occur+1

				if occur==len(cl):
					flag=True
			if flag:
				checked_clique.append(cl)
				#print(cl)


		return checked_clique
Exemple #14
0
def plotWeightedCommunities(G, W_lim, k_clique, n_nodes):
	for i in range(0,n_nodes):
		for j in range(i,n_nodes):
			if(i!=j):
				if(G[i][j]['weight'] < W_lim):
					G.remove_edge(i,j)

	cls = nx.find_cliques(G)
	communities = list(nx.k_clique_communities(G,k_clique ,cliques = cls))

	print(len(communities))

	pos=nx.graphviz_layout(G) # positions for all nodes


	plt.figure(figsize=(12,12))

	#colors = ["green","yellow","red","blue","pink","orange","gray","brown","black","white","purple","green","yellow","red","blue","pink","orange","gray","brown","black","white","purple"]

	for i in range(len(communities)):
		nx.draw_networkx_nodes(G,pos,nodelist=list(communities[i]),node_color=colors[i])

	nx.draw_networkx_edges(G,pos,width=0.5)
			# labels
	nx.draw_networkx_labels(G,pos,font_size=10,font_family='sans-serif')

	plt.axis('off')
	plt.savefig("comm_w_"+str(W_lim)+"k"+str(k_clique)+".png") # save as png
	plt.close()
def diagonal_bins_to_cliques(args, run_filters, bins):
    G = nx.Graph()
    [G.add_node(a) for a in bins.values()]

    for n1 in G.nodes_iter():
        for n2 in G.nodes_iter():
            if n1.name == n2.name:
                continue

            docs_diff = docs_differ_symmetry(n1.doc_AASeq, n2.doc_AASeq, '1ohz')
            if switches_differ(args, n1.coh_switch, n2.coh_switch) and docs_diff >= args['doc_diff_by']:
                G.add_edge(n1, n2)

            else:
                print('\n')
                print(n1.coh_switch)
                print(n2.coh_switch)
                print(n1.doc_switch)
                print(n2.doc_switch, docs_diff)


    cliques = [a for a in nx.find_cliques(G)]
    max_len = max([len(a) for a in cliques])
    max_cliques = [a for a in cliques if len(a) == max_len]
    print('there are %i cliques with %i structures in each for diff_by=%i doc_diff_by=%i' % (len(max_cliques), max_len, args['diff_by'], args['doc_diff_by']))
    return max_cliques
Exemple #16
0
def find_best_clique(sim_mat, size):
	G = nx.Graph()
	for x in xrange(len(sim_mat)):
		G.add_node(x)
	edges = get_sorted_edges(sim_mat)
	x = 0
	thresh = 0.05
	while thresh <= 1:
		while x < len(edges) and edges[x][2] <= thresh:
			G.add_edge(edges[x][0], edges[x][1])
			x += 1
		max_cliques = nx.find_cliques(G)

		# bucket sort
		by_size = collections.defaultdict(list)
		for clique in max_cliques:
			by_size[len(clique)].append(clique)

		biggest = max(by_size.keys())
		if biggest >= size:
			# do tie breaking
			cliques = by_size[biggest]
			best_clique = None
			best_score = 1000000
			for clique in cliques:
				score = max_weight_clique(sim_mat, clique)
				if score < best_score:
					best_score = score
					best_clique = clique
			return best_clique
		thresh += 0.05
Exemple #17
0
 def create_clique_list(self):
     '''
     a method to create a list of cliques
     Parameters:
         None
     Returns:
         cliques: the list of cliques (list)
         chromatic: the chromatic number (int)
         l_index: the largest clique index (int)
     '''
     g = self.graph.copy()
     chromatic = 0
     l_index = 0
     index = 0
     cliques = []
     while len(g.nodes()) > 0:
         largest = 0
         for clique in nx.find_cliques(g):
             if len(clique) > largest:
                 largest = len(clique)
                 largest_clique = clique
         clique = []
         for node in largest_clique:
             g.remove_node(node)
             clique.append([node])
         if len(clique) > chromatic:
             chromatic = len(clique)
             largest = clique
             l_index = index
         cliques.append(clique)
         index += 1
     return cliques, chromatic, l_index
Exemple #18
0
def find_cliques(G, min_size):
    """Find all cliques in G above a given size.

    If a node is part of a larger clique is deleted from the smaller ones.

    Returns
    -------
    dict
        Mapping nodes to clique ID
    """
    cliques = []
    for K in nx.find_cliques(G):
        if len(K) >= min_size:
            cliques.append(set(K))
    cliques.sort(reverse=True, key=lambda x: len(x))
    L = set()
    for K in cliques:
        K -= L
        L |= K
    cliques = [J for J in cliques if len(J) >= min_size]
    node_to_clique = {}
    for i, K in enumerate(cliques):
        for node in K:
            if node not in node_to_clique:
                node_to_clique[node] = i
    return node_to_clique
Exemple #19
0
    def get_percolation_graph():
        percolation_graph = nx.Graph()
        cliques = [frozenset(c) for c in nx.find_cliques(G) if len(c) >= k]
        print 'first max cliques:', cliques

        percolation_graph.add_nodes_from(cliques)

        # First index which nodes are in which cliques
        membership_dict = defaultdict(list)
        for clique in cliques:
            for node in clique:
                membership_dict[node].append(clique)

        # For each clique, see which adjacent cliques percolate
        for clique in cliques:
            def get_adjacent_cliques(clique, membership_dict):
                adjacent_cliques = set()
                for n in clique:
                    for adj_clique in membership_dict[n]:
                        if clique != adj_clique:
                            adjacent_cliques.add(adj_clique)
                return adjacent_cliques

            for adj_clique in get_adjacent_cliques(clique, membership_dict):
                if len(clique.intersection(adj_clique)) >= (k - 1):
                    percolation_graph.add_edge(clique, adj_clique)

        print '\npercolation graph nodes:', percolation_graph.nodes()
        print 'percolation graph edges:', percolation_graph.edges()
        return percolation_graph
def find_predicted_cliques():
    """
    :return:[[member1, member2....], ...] all cliques that are completely predicted and are orthogonal
    """
    coh_doc_purples = creat_coh_doc_purples()
    G = nx.Graph()
    all_cohs = list(coh_doc_purples.keys())
    all_docs = list(set([doc for coh in all_cohs for doc in coh_doc_purples[coh].keys()]))
    # [G.add_node((coh, doc)) for coh in all_cohs for doc in all_docs if coh_doc_purples[coh][doc] >= 10]
    for coh in all_cohs:
        for doc in all_docs:
            if doc in coh_doc_purples[coh].keys():
                if coh_doc_purples[coh][doc] >= 10:
                    G.add_node((coh, doc))

    for coh1, doc1 in G.nodes_iter():
        for coh2, doc2 in G.nodes_iter():
            if (coh1, doc1) != (coh2, doc2):
                if doc1 in coh_doc_purples[coh2].keys() and doc2 in coh_doc_purples[coh1].keys():
                    if coh_doc_purples[coh1][doc2] < 10 and coh_doc_purples[coh2][doc1] < 10:
                        G.add_edge((coh1, doc1), (coh2, doc2))

    cliques = list(nx.find_cliques(G))
    print('found the following cliques:')
    for clq in cliques:
        print(clq, len(clq))

    print('the grapg had %i nodes, and %i edges' % (G.number_of_nodes(), G.number_of_edges()))
    return cliques
Exemple #21
0
 def testConnectNodeList(self):
     node_list = [0, 3, 6, 1, 4, 7, 2, 5, 8]
     self.g.connect_node_list(node_list)
     for clique in nx.find_cliques(self.g._graph):
         expect = len(clique)
         break
     self.assertEqual(expect, self.n**2)
Exemple #22
0
def clique_zoeker(graph):

    # Haalt alle benodigde nodes uit ingevoerde connecties.
    nodes = set([n1 for n1, n2 in graph] + [n2 for n1, n2 in graph])

    # Maakt nieuwe graph aan.
    G = nx.Graph()

    # Maakt alle nodes.
    for node in nodes:
        G.add_node(node)

    # Maakt alle lijnen tussen nodes (connecties).
    for edge in graph:
        G.add_edge(edge[0], edge[1])

    # Zoekt naar alle cliques.
    clique = nx.find_cliques(G)

    # Plaatst alle gevonden cliques in een lijst.
    result = list(clique)

    # Haalt de grootste clique uit de lijst.
    result = max(result,key=len)

    # Returnt de grootste clique.
    return result
Exemple #23
0
def clique_graph(G, create_using=None, name=None):
    """Create the maximal clique graph of a graph.

    Finds the maximal cliques and treats these as nodes.
    The nodes are connected if they have common members in
    the original graph.  Theory has done a lot with clique
    graphs, but I haven't seen much on maximal clique graphs.

    Notes
    -----
    This should be the same as make_clique_bipartite followed
    by project_up, but it saves all the intermediate steps.
    """
    cliq = list(map(set, nx.find_cliques(G)))
    if create_using:
        B = create_using
        B.clear()
    else:
        B = nx.Graph()
    if name is not None:
        B.name = name

    to_node = lambda cl: tuple(sorted(cl))
    for i, cl in enumerate(cliq):
        u = to_node(cl)
        B.add_node(u)
        for j, other_cl in enumerate(cliq[:i]):
            intersect = cl & other_cl
            if intersect:     # Not empty
                B.add_edge(u, to_node(other_cl), weight=len(intersect))
    return B
def get_ego_cliques(ego):
    ego_cliques_dmp = join(DATA_DIR, 'cliques', 'cliques_%s.zip'%ego)
    if not os.path.exists(ego_cliques_dmp):
        print 'Processing cliques: nx.find_cliques, ego:', ego
        G = load_ego_graph(ego)
        # this can take some time...
        # http://pymotw.com/2/zipfile/
        with zipfile.ZipFile(ego_cliques_dmp, mode='w') as zf:
            fileno = 1
            ego_cliques = []
            for idx, clqs in enumerate(nx.find_cliques(G)):
                if idx%100000==0 and ego_cliques:
                    _write_cliques_file(zf, fileno, ego_cliques)
                    fileno += 1
                    ego_cliques = []
                ego_cliques.append(clqs)
            _write_cliques_file(zf, fileno, ego_cliques)
            ego_cliques = None

    if False: #ego==5881:
        print 'In get_ego_cliques, skipping ego', ego
    else:
        print 'Loading cliques for ego:', ego
        with zipfile.ZipFile(ego_cliques_dmp, mode='r') as zf:
            for f in zf.namelist():
                cliques_in_file = json.loads(zf.read(f))
                for clique in cliques_in_file:
                    yield clique 
Exemple #25
0
def __pes_to_bp_gen_conds_pre_clique_based (es, unf, ev_tab, pre_tab, indep,split_conflicts=False) :
    for e in es.events :
        # for all events in e.post, build graph whose edges are
        # the dependence relation
        g = networkx.Graph ()
        g.add_nodes_from (e.post)
        for e1 in e.post :
            for e2 in e.post :
                if e1 != e2 and not indep[e1.label, e2.label] :
                    g.add_edge (e1, e2)
        # for every clique, generate one condition
        for clique in networkx.find_cliques (g) :
            # remove events for which there is already condition
            if not split_conflicts:
                for ep in [ep for ep in clique if (e, ep) in pre_tab] :
                    clique.remove (ep)
            if len (clique) == 0 : continue
            unfpostevs = [ev_tab[ep] for ep in clique]
            #agrega la condicion
            if split_conflicts:
                for unfpost in unfpostevs:
                    c = unf.cond_add (None, [ev_tab[e]], [unfpost])
            else:
                c = unf.cond_add (None, [ev_tab[e]], unfpostevs)
            for ep in clique :
                pre_tab[e, ep] = c
        # events with empty preset will never occurr in previous
        # search, deal with them separately
        if len (e.pre) == 0 :
            if (None, e) not in pre_tab :
                c = unf.cond_add (None, [], [ev_tab[e]])
                pre_tab[None, e] = c
    return pre_tab
Exemple #26
0
 def find_foundations(self, cache = True):
     if cache and isinstance(self._foundations, list):
         return self._foundations
     foundations = list(nx.find_cliques(self))
     foundations = self._reduce_cliques(foundations)
     self._foundations = foundations
     return self._foundations
Exemple #27
0
def nx_cliques(ppis, min_len=3, min_weight=0):
    G = nx.Graph()
    G.add_weighted_edges_from([p[:3] for p in ppis])
    qs = [set(c) for c in nx.find_cliques(G) if len(c) >= min_len]
    if min_weight:
        qs = [q for q in qs if avg_weight(G,q) > min_weight]
    return qs
Exemple #28
0
def find_disjoint_sets(found_sets):
    # uses python graph data structure in which each node is a set
    # edges are created between nodes if the nodes are disjoint sets
    # the maximum clique algorithm is used to calculate the largest collection
    # of disjoint sets
    # initialize graph
    graph = nx.Graph()
    # add all sets as nodes in the graph
    for i in xrange(len(found_sets)):
        graph.add_node(found_sets[i])
        # iteraties though each node and adds edges
    for node1 in graph.nodes():
        for node2 in graph.nodes():
            if node1 == node2:
                continue
            if node2 in graph.neighbors(node1):
                continue
            else:
                if is_disjoint(node1, node2):
                    graph.add_edge(node1, node2)
                # use find_cliques function generator to find the max cliques
    max_clique = []
    for clique in nx.find_cliques(graph):
        if len(max_clique) < len(clique):
            max_clique = clique

    return max_clique
Exemple #29
0
 def __init__(self, points, epsilon, labels=None, distfcn=distance.euclidean):
     self.pts = points
     self.labels = range(len(self.pts)) if labels==None or len(labels)!=len(self.pts) else labels
     self.epsilon = epsilon
     self.distfcn = distfcn
     self.network = self.construct_network(self.pts, self.labels, self.epsilon, self.distfcn)
     self.import_simplices(map(tuple, list(nx.find_cliques(self.network))))
Exemple #30
0
def collapsible_patterns(alms, G, context, ref='pcogids', verbose=False,
        use_taxa=["Old_Burmese", "Burmese", "Written_Burmese",
        "Rangoon", "Achang_Longchuan", "Xiandao", "Lashi", "Atsi", "Bola", "Maru"]):
    if [x for x in use_taxa if x not in alms.taxa]:
        raise ValueError("Your list of taxa contains taxa not in the wordlist.")
    patterns = defaultdict(list)
    for node, data in G.nodes(data=True):
        concept = alms[alms.msa[ref][int(node)]['ID'][0], 'concept']
        words = []
        msa = alms.msa[ref][int(node)]
        for i, t in enumerate(use_taxa):
            if t in msa['taxa']:
                words += [''.join(msa['seqs'][msa['taxa'].index(t)]).replace('-','')]
            else:
                words += ['Ø']
        patterns[data['clique']] += [(node, concept, words)]
    collapsible = defaultdict(list)
    for pattern, vals in patterns.items():
        g = nx.Graph()
        for n, c, words in vals:
            collapsible[pattern, tuple(words)] += [(n, c)]
            g.add_node(n, c=c, w=words)
        for (n1, c1, words1), (n2, c2, words2) in combinations(vals, r=2):
            if compatible_columns(words1, words2, gap='Ø') >= 1:
                g.add_edge(n1, n2)
        for clique in nx.find_cliques(g):
            if len(clique) > 1:
                for n in clique:
                    print(pattern, '{0:4}'.format(n), 
                            '{0:22}'.format(g.node[n]['c'][:21]),
                            '   '.join(['{0:6}'.format(x) for x in
                                g.node[n]['w']]))
                print('--')
bet = nx.betweenness_centrality(facebook_net)
bet_sorted = sorted(bet.items(), key=operator.itemgetter(1), reverse=True)
print("Top 10 betweenness centrality (node, centrality): ", bet_sorted[0:9])
#Eigenvector centrality top 10
eig = nx.eigenvector_centrality(facebook_net)
eig_sorted = sorted(eig.items(), key=operator.itemgetter(1), reverse=True)
print("Top 10 eigenvector centrality (node, centrality): ", eig_sorted[0:9])
#Pagerank centrality top 10
pag = nx.pagerank(facebook_net)
pag_sorted = sorted(pag.items(), key=operator.itemgetter(1), reverse=True)
print("Top 10 pagerank centrality (node, centrality): ", pag_sorted[0:9])

#Trim network to only show nodes with more than 1 connection
facebook_net_trimmed = facebook_net.copy()
for n in facebook_net_trimmed.nodes():
    if deg[n] < 2:
        facebook_net_trimmed.remove_node(n)

#View all cliques
cliques = list(nx.find_cliques(facebook_net_trimmed))
print("Cliques:")
for c in cliques:
    print(c)

#Export data for use in Gephi
nx.write_gexf(facebook_net, "facebook_network.gexf")

#Plot Facebook network
nx.draw_random(facebook_net)
plt.show()
Exemple #32
0
 def test_find_cliques2(self):
     hcl = list(nx.find_cliques(self.H))
     assert_equal(sorted(map(sorted, hcl)),
                  [[1, 2], [1, 4, 5, 6], [2, 3], [3, 4, 6]])
data_path = '../../data/clique/simple' + ('_single'
                                          if single_place else '') + '.csv'
fig_path = '../../results/clique/simple' + ('_single'
                                            if single_place else '') + '.png'

if fresh_data:
    # Import data and generate network
    _, edges = load_airport_and_route(deep_load=True)
    netx = from_edgelist(edges)
    N = number_of_nodes(netx)
    budget = N * balls_per_node
    net = network(N, graph=netx)
    print('Data import and network generated')

    # Find and sort cliques
    cliques = sorted(find_cliques(netx), key=lambda c: len(c), reverse=True)

    trial_infections = []
    num_cliques = linspace(1, 120, 40).astype(int)
    for num in num_cliques:
        simple_cliques(net,
                       num,
                       budget,
                       cliques=cliques,
                       single_place=single_place)
        trial = run_polya(net, trials=2)
        trial_infections.append(trial[len(trial) - 1])
else:
    trial_infections, num_cliques = load_csv_col(data_path,
                                                 with_headers=True,
                                                 parse=float,
print "There are ", len(components), "component"
for component in components:
    print "length:", len(component)

# <markdowncell>

# Plotting Ego Graph of a person.

# <codecell>

name = "Abhinav Pandey"
ego = nx.ego_graph(g, name, radius=1)
nx.draw(ego)
plot.show()
print "Clustering:", nx.clustering(g, name)

# <markdowncell>

# Calculating Cliques in Graph. These are closed faternity like terrorist organization.

# <codecell>

clique = nx.find_cliques(g)
clique = list(clique)
sorted_clique = sorted(clique, key=lambda x: len(x))
sorted_clique[-1]

# <codecell>

# <codecell>
Exemple #35
0
def fuzzy(threshold, matrix, taxa, method='upgma', revert=False):
    """
    Create fuzzy cluster of a given distance matrix.

    Parameters
    ----------
    threshold : float
        The threshold that shall be used for the basic clustering of the data.

    matrix : list
        A two-dimensional list containing the distances.

    taxa : list
        An list containing the names of all taxa corresponding to the distances
        in the matrix.

    method : { "upgma", "single", "complete" } (default="upgma")
        Select the method for the flat cluster analysis.

    distances : bool
        If set to "False", only the topology of the tree will be returned.

    revert : bool (default=False)
        Specify whether a reverted dictionary should be returned.

    Returns
    -------
    cluster : dict
        A dictionary with cluster-IDs as keys and a list as value, containing
        the taxa that are assigned to a given cluster-ID.

    Examples
    --------
    The function is automatically imported along with LingPy.

    >>> from lingpy import *
    from lingpy.algorithm import squareform

    Create a list of arbitrary taxa.

    >>> taxa = ['German','Swedish','Icelandic','English','Dutch']

    Create an arbitrary distance matrix.

    >>> matrix = squareform([0.5,0.67,0.8,0.2,0.4,0.7,0.6,0.8,0.8,0.3])
    >>> matrix
    [[0.0, 0.5, 0.67, 0.8, 0.2],
     [0.5, 0.0, 0.4, 0.7, 0.6],
     [0.67, 0.4, 0.0, 0.8, 0.8],
     [0.8, 0.7, 0.8, 0.0, 0.3],
     [0.2, 0.6, 0.8, 0.3, 0.0]]

    Carry out the fuzzy flat cluster analysis.

    >>> fuzzy(0.5,matrix,taxa)
    {1: ['Swedish', 'Icelandic'], 2: ['Dutch', 'German'], 3: ['Dutch', 'English']}

    Notes
    -----
    This is a very simple fuzzy clustering algorithm. It basically does nothing
    else than removing taxa successively from the matrix, flat-clustering the
    remaining taxa with the corresponding threshold, and then returning a
    combined "consensus" cluster in which taxa may be assigned to multiple
    clusters.

    See also
    --------
    link_clustering

    """
    g = nx.Graph()

    for taxon in taxa:
        g.add_node(taxon)

    for idx, taxon in enumerate(taxa):
        new_matrix = []
        for i, line in enumerate(matrix):
            for j, cell in enumerate(line):
                if i < j and i != idx and j != idx:
                    new_matrix += [cell]
        new_matrix = misc.squareform(new_matrix)

        clusters = cluster.flat_cluster(
            method, threshold, new_matrix, [t for t in taxa if t != taxon])

        for clr in clusters:
            for tA, tB in util.combinations2(clusters[clr]):
                if not g.has_edge(tA, tB):
                    g.add_edge(tA, tB, weight=1)
                else:
                    g[tA][tB]['weight'] += 1
    out = {i + 1: c for i, c in enumerate(nx.find_cliques(g))}

    if revert:
        new_out = defaultdict(list)
        for key, val in out.items():
            for v in val:
                new_out[v].append(key)
        return new_out

    return out
Exemple #36
0
N = []
GT = nx.DiGraph()
R = []
seen = set()
rhs_dict = {}

for key in sorted(T.keys()):
    # backwards version
    print "================================================"
    print "KEY:", key, T[key]
    print "================================================"
    subgraph = nx.Graph(T[key])
    # for clique in nx.find_cliques(subgraph):
    while True:
        clique = []
        for x in nx.find_cliques(subgraph):
            if len(x) > 1:
                clique = x
                break
        if len(clique) == 0:
            break
        sg = G.subgraph(clique)
        clique_str = ','.join(str(y) for y in sorted(clique))
        N.append(clique_str)
        G.remove_edges_from(sg.edges())
        subgraph.remove_edges_from(sg.edges())
        subgraph.number_of_edges()

        # of the edges I just removed.. did I create any singletons that need to be grammarred
        singletons = []
        for n in clique:
Exemple #37
0
 def test_directed(self):
     with pytest.raises(nx.NetworkXNotImplemented):
         next(nx.find_cliques(nx.DiGraph()))
Exemple #38
0
features = pd.read_csv('../data/speaq_results/features.csv').iloc[:, 1:]

MAX_CLIQUES = 100000
PEARSON_CORRELATION_THRESHOLDS = np.arange(.5, .95, .05)
MINIMUM_PERCENTAGE_OF_PEAKS_MATCHED_THRESHOLDS = np.arange(.3, .9, .1)

results = []
peak_db = generate_db(data_dir)
for pearson_thresh in PEARSON_CORRELATION_THRESHOLDS:

    corr_mat = features.corr('spearman')

    g = nx.Graph(corr_mat > pearson_thresh)
    cliqs = []

    for idx, cliq in enumerate(nx.find_cliques(g)):
        if idx > MAX_CLIQUES:
            raise Exception(f"More than {MAX_CLIQUES} cliques found")

        cliqs.append(cliq)
    logger.info(
        f"pearson_thresh: {pearson_thresh:.2f} -- identified {len(cliqs)} cliques"
    )

    for missing_thresh in MINIMUM_PERCENTAGE_OF_PEAKS_MATCHED_THRESHOLDS:
        found = False
        identified_metabolites = {}
        for idx, cliq in enumerate(cliqs):
            res = peak_db.query_n(peakset2ppm(cliq, filled),
                                  missing_thresh=missing_thresh)
            for qs in res:
    ###Disparity calculation####
    disp_c = disparity(imgL_c,imgR_c)
    disp_p = disparity(imgL_p,imgR_p)
    
    ###finding 3D coord ###
    tri_c = triangulation(disp_c,data[:,2:])
    tri_p = triangulation(disp_p,data[:,:2])
    
    #finding index of inliers
    th = .2
    dist_c = scipy.spatial.distance.cdist(tri_c, tri_c, 'sqeuclidean')
    dist_p = scipy.spatial.distance.cdist(tri_p, tri_p, 'sqeuclidean')

    mask = (abs(dist_c-dist_p) < th).astype('uint8')
    G = nx.from_numpy_matrix(mask)        
    list_cliq = list(find_cliques(G))
    length = np.asarray([len(i) for i in list_cliq])
    max_cliq_node = list_cliq[np.argmax(length)]
    
    ####inliers####
    world_c_h = tri_c[max_cliq_node]
    world_p_h = tri_p[max_cliq_node]
    
    img_c = data[:,2:][max_cliq_node]
    img_p = data[:,:2][max_cliq_node]
    
    img_p_h = np.concatenate((img_p,np.ones((len(img_p),1))),axis=1)
    img_c_h = np.concatenate((img_c,np.ones((len(img_c),1))),axis=1)
    
    print(img_c_h.shape)
    ########feeding in random idx######
Exemple #40
0
'''
Finding cliques (I)

You're now going to practice finding cliques in G. Recall that cliques are "groups of nodes that are fully connected to one another", while a maximal clique is a clique that cannot be extended by adding another node in the graph.
Instructions
100xp

    Count the number of maximal cliques present in the graph and print it.
        Use the nx.find_cliques() function of G to find the maximal cliques.
        The nx.find_cliques() function returns a generator object. To count the number of maximal cliques, you need to first convert it to a list with list() and then use the len() function. Place this inside a print() function to print it.

    Take Hint (-30xp)
'''
# Calculate the maximal cliques in G: cliques
cliques = nx.find_cliques(G)

# Count and print the number of maximal cliques in G
print(len(list(cliques)))


'''
Finding cliques (II)

Great work! Let's continue by finding a particular maximal clique, and then plotting that clique.
Instructions
100xp

    Find the author(s) that are part of the largest maximal clique, and plot the subgraph of that/one of those clique(s) using a CircosPlot. To do this:
        Use the nx.find_cliques() function to calculate the maximal cliques in G. Place this within the provided sorted() function to calculate the largest maximal clique.
        Create the subgraph consisting of the largest maximal clique using the .subgraph() method and largest_clique.
Exemple #41
0
    def to_junction_tree(self):
        """
        Creates a junction tree (or clique tree) for a given markov model.

        For a given markov model (H) a junction tree (G) is a graph
        1. where each node in G corresponds to a maximal clique in H
        2. each sepset in G separates the variables strictly on one side of the
        edge to other.

        Examples
        --------
        >>> from pgmpy.models import MarkovModel
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> mm = MarkovModel()
        >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                    ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                    ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()]
        >>> mm.add_factors(*phi)
        >>> junction_tree = mm.to_junction_tree()
        """
        from pgmpy.models import JunctionTree

        # Check whether the model is valid or not
        self.check_model()

        # Triangulate the graph to make it chordal
        triangulated_graph = self.triangulate()

        # Find maximal cliques in the chordal graph
        cliques = list(map(tuple, nx.find_cliques(triangulated_graph)))

        # If there is only 1 clique, then the junction tree formed is just a
        # clique tree with that single clique as the node
        if len(cliques) == 1:
            clique_trees = JunctionTree()
            clique_trees.add_node(cliques[0])

        # Else if the number of cliques is more than 1 then create a complete
        # graph with all the cliques as nodes and weight of the edges being
        # the length of sepset between two cliques
        elif len(cliques) >= 2:
            complete_graph = UndirectedGraph()
            edges = list(itertools.combinations(cliques, 2))
            weights = list(
                map(lambda x: len(set(x[0]).intersection(set(x[1]))), edges))
            for edge, weight in zip(edges, weights):
                complete_graph.add_edge(*edge, weight=-weight)

            # Create clique trees by minimum (or maximum) spanning tree method
            clique_trees = JunctionTree(
                nx.minimum_spanning_tree(complete_graph).edges())

        # Check whether the factors are defined for all the random variables or not
        all_vars = itertools.chain(
            *[factor.scope() for factor in self.factors])
        if set(all_vars) != set(self.nodes()):
            ValueError(
                'DiscreteFactor for all the random variables not specified')

        # Dictionary stating whether the factor is used to create clique
        # potential or not
        # If false, then it is not used to create any clique potential
        is_used = {factor: False for factor in self.factors}

        for node in clique_trees.nodes():
            clique_factors = []
            for factor in self.factors:
                # If the factor is not used in creating any clique potential as
                # well as has any variable of the given clique in its scope,
                # then use it in creating clique potential
                if not is_used[factor] and set(factor.scope()).issubset(node):
                    clique_factors.append(factor)
                    is_used[factor] = True

            # To compute clique potential, initially set it as unity factor
            var_card = [self.get_cardinality()[x] for x in node]
            clique_potential = DiscreteFactor(node, var_card,
                                              np.ones(np.product(var_card)))
            # multiply it with the factors associated with the variables present
            # in the clique (or node)
            # Thanh Dat
            if len(clique_factors) > 0:
                clique_potential *= factor_product(*clique_factors)
            clique_trees.add_factors(clique_potential)

        if not all(is_used.values()):
            raise ValueError(
                'All the factors were not used to create Junction Tree.'
                'Extra factors are defined.')

        return clique_trees
Exemple #42
0
cnt = 0
for i, adj_list in tqdm(enumerate(data.adj_node.values)):
    #for i,adj_list in enumerate(data.adj_node.values):
    edges = [(i, item) for item in adj_list if item != i]
    if edges == []:
        #        if cnt%100000==0:
        #            print("ignore cnts %d" %cnt)
        cnt += 1
    else:
        G.add_edges_from(edges)
print("ignore cnts %d" % cnt)

print('begin train')
cnt = 0
max_clique = np.zeros(data.shape[0])
for clique in tqdm(nx.find_cliques(G)):
    #for clique in nx.enumerate_all_cliques(G):
    if cnt % 100000 == 0:
        print("deal cnts %d" % cnt)
    len_clique = len(clique)
    for item in clique:
        max_clique[item] = max(max_clique[item], len_clique)
    cnt += 1

print("totally max_clique %d" % cnt)

n_train = train.shape[0]
train_clique_data = max_clique[0:n_train]
test_clique_data = max_clique[n_train:]

pd_train_clique = pd.DataFrame(train_clique_data,
import networkx as nx
import matplotlib.pyplot as plt

graph = nx.karate_club_graph()  # Graph about some karate club
# Searching and finding all cliques less than 4
cliques = nx.find_cliques(graph)
print('Cliques size less than 4 : %s' % [c for c in cliques if len(c) >= 4])

# Uniting cliques into communities
#communities = nx.k_clique_communities(graph,k=4)
#communities_list = [list(c) for c in communities]
#nodes_list = [node for community in communities_list for node in communities]
#print('Finding next communities :%s' % communities_list)
# Conclusing
#subgraph = graph.subgraph(nodes_list)

pos = nx.spring_layout(
    graph
)  # Для построений графа воспользуемся силовым алгоритмом Фрюхтена-Рейгольда
nx.draw(graph, pos=pos, with_labels=True)
plt.show()
Exemple #44
0
# pagerank concept
import random

N = 7
G = nx.erdos_renyi_graph(20, 0.2)
for (start, end) in G.edges:
    G.edges[start, end]['weight'] = random.random()
t = nx.pagerank(G)
print(t)
nx.draw(G, with_labels=True)
plt.show()

#%%
# connected component
G = nx.erdos_renyi_graph(20, 0.1)
print(list(nx.find_cliques(G)))
t = sorted(list(nx.connected_components(G)), key=lambda x: len(x))[-1]
print(t)

L = nx.subgraph(G, t)
nx.draw(L)
plt.show()

t = nx.connected_components(G)
i = 1
for n in t:
    for k in n:
        G.nodes[k]['grouping'] = i
        G.nodes[k]['alphabetically'] = i
    i += 1
Exemple #45
0
print(percentile(list(deg.values()), 25))  #first percentile
print(median(list(deg.values())))
print(percentile(list(deg.values()), 75))
print(max(list(deg.values())))

# we can choose to only select the characters that have a degree > 10
# so these are relatively main charcters

Gt = G.copy()
dn = nx.degree(Gt)
for n in Gt.nodes():
    if dn[n] <= 10:
        Gt.remove_node(n)
nx.draw_networkx(Gt, node_size=0, edge_color='b', alpha=.2, font_size=12)
show()

# so here are are observing the most relevant characters and their relationships
# we can also find cliques
from networkx import find_cliques
cliques = list(find_cliques(G))
print(max(cliques, key=lambda l: len(l)))
# print the biggest clique
""" other resources
opencv
pandas
scipy
statsmodels
nltk
ipython
"""
Exemple #46
0
def maximum_clique_exact_solve_np_hard(G_in):
    max_clique_number = nx.graph_clique_number(G_in)
    cliques = nx.find_cliques(G_in)
    for cl in cliques:
        if len(cl) == max_clique_number:
            return cl
Exemple #47
0
                min_mean_q,
                max_mean_q,
                min_std_q,
                max_std_q,
            ))

            c += 1
            end = datetime.now()

    print 'times:', end - start


G = prepare_graph([path + 'train.csv'])
cnt = 0
max_clique = dict()
for clique in nx.find_cliques(G):
    if cnt % 100000 == 0:
        print("deal cnts %d" % cnt)
    len_clique = len(clique)
    for item in clique:
        c = max_clique.get(item, [])
        c.append(len_clique)
        max_clique[item] = c
    cnt += 1

prepare_clique_stats(path + 'train.csv', path + 'train_clique_stats_sep.csv',
                     max_clique)

G = prepare_graph([path + 'train.csv', path + 'test.csv'])
cnt = 0
max_clique = dict()
Exemple #48
0
graph theory week4 notebook

"""

import networkx as nx
import pygraphviz as pgv
from nxpd import draw, nxpdParams
nxpdParams['show'] = 'ipynb'

G = nx.Graph()
G.add_edges_from([('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e'), ('e', 'a'),
                  ('f', 'a'), ('g', 'b'), ('h', 'c'), ('i', 'd'), ('j', 'e'),
                  ('f', 'h'), ('h', 'j'), ('j', 'g'), ('g', 'i'), ('i', 'f'),
                  ('j', 'i'), ('j', 'd'), ('d', 'g')])

draw(G, layout='circo')

cliques = nx.find_cliques(G)
max_clique = G.nodes()[0]
for c in cliques:
    if (len(c) > len(max_clique)):
        max_clique = c

for v in max_clique:
    G.node[v]['color'] = 'red'
    for u in max_clique:
        if u != v:
            G[u][v]['color'] = 'red'

draw(G, layout='circo')
Exemple #49
0
    a[Z] - b[Z])  # function returns manhattan dist

# part 1
largest_radius_bot = max(bots, key=lambda bot: bot[RADIUS])
print(
    'part 1:',
    sum((manhattan(largest_radius_bot, bot) <= largest_radius_bot[RADIUS])
        for bot in bots))

# part 2
# build a graph with edges between overlapping nanobots
graph = nx.Graph()
for bot in bots:
    # two bots overlap if their distance is smaller or equal than the sum of their ranges
    overlaps = [(bot, other) for other in bots
                if manhattan(bot, other) <= bot[RADIUS] + other[RADIUS]]
    graph.add_edges_from(overlaps)

# find sets of overlapping nanobots (i.e. fully-connected sub-graphs)
cliques = list(nx.find_cliques(graph))
cliques_size = [len(c) for c in cliques]
assert len([
    s for s in cliques_size if s == max(cliques_size)
]) == 1  # currently no tie breaking check so make sure it doesn't matter
clique = max(cliques, key=len)  # keep largest clique

# calculate the point in the nanobots' radii which is closest to the origin - greedy strategy - not confident as general
points = [manhattan(ORIGIN, bot) - bot[RADIUS] for bot in clique]
# furthest away point in points needed to get all bots in the clique but closest to origin
print('part 2:', max(points))
Exemple #50
0
print("--------------------------------")

N = 100
print("3. Extracting a subgraph on {} random nodes (just a test).".format(N))

G1 = G.subgraph(np.random.choice(G.nodes(), N))
print("Done.")

#print("HACK! Replacing graph by subgraph"); G = G1

print("--------------------------------")

print("4. Looking for maximal cliques in the subgraph.")

C = list(nx.find_cliques(G1))
print("Done.")

cc = [len(c) for c in C]
(h, _) = np.histogram(cc, bins=range(1, 10))
print("Found: {} cliques.".format(len(cc)))
print("Histogram of clique size:", h)

print("--------------------------------")

print("5. Looking for maximal cliques in the whole graph.")

C = list(nx.find_cliques(G))
pickle.dump({'C': C}, open(output_filename_maxcliques, "wb"))
print("Done.")
Exemple #51
0
    def run_cycle(self, xds_ascii_files, reference_idx=None):
        if len(xds_ascii_files) == 0:
            print >> self.out, "Error: no files given."
            return

        xscale_inp = os.path.join(self.workdir, "XSCALE.INP")
        xscale_lp = os.path.join(self.workdir, "XSCALE.LP")

        # Get averaged cell for scaling
        sg, cell, lcv, alcv = self.average_cells(xds_ascii_files)
        self.cell_info_at_cycles[self.get_last_cycle_number()] = (cell, lcv,
                                                                  alcv)

        # Choose directory containing XDS_ASCII.HKL and set space group (but how??)
        inp_out = open(xscale_inp, "w")
        inp_out.write("! This XSCALE.INP is generated by kamo.multi_merge.\n")
        inp_out.write(
            "! You may want to use yamtbx.run_xscale to re-run xscale by yourself\n"
        )
        inp_out.write(
            "! because number of characters in line may exceed the limit of xscale.\n"
        )
        inp_out.write("MAXIMUM_NUMBER_OF_PROCESSORS= %d\n" % self.nproc)
        inp_out.write("SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" %
                      (sg, cell))
        inp_out.write(self.xscale_inp_head)

        for i, xds_ascii in enumerate(xds_ascii_files):
            f = self.altfile.get(xds_ascii, xds_ascii)
            tmp = min(os.path.relpath(f, self.workdir),
                      f,
                      key=lambda x: len(x))
            refstr = "*" if i == reference_idx else " "
            inp_out.write(" INPUT_FILE=%s%s\n" % (refstr, tmp))
            if self.d_max is not None:
                d_range = (float("inf") if self.d_max is None else self.d_max,
                           0. if self.d_min is None else self.d_min)
                inp_out.write("  INCLUDE_RESOLUTION_RANGE= %.4f %.4f\n" %
                              d_range)
            if len(self.xscale_params.corrections) != 3:
                inp_out.write("  CORRECTIONS= %s\n" %
                              " ".join(self.xscale_params.corrections))
            if (self.xscale_params.frames_per_batch,
                    self.xscale_params.degrees_per_batch).count(None) < 2:
                xactmp = XDS_ASCII(f, read_data=False)
                frame_range = xactmp.get_frame_range()
                osc_range = xactmp.osc_range
                nframes = frame_range[1] - frame_range[0] + 1
                if self.xscale_params.frames_per_batch is not None:
                    nbatch = int(
                        numpy.ceil(nframes /
                                   self.xscale_params.frames_per_batch))
                else:
                    nbatch = int(
                        numpy.ceil(nframes /
                                   self.xscale_params.degrees_per_batch *
                                   osc_range))
                print >> self.out, "frame range of %s is %d,%d setting NBATCH= %d" % (
                    f, frame_range[0], frame_range[1], nbatch)
                inp_out.write("  NBATCH= %d\n" % nbatch)

        inp_out.close()

        print >> self.out, "DEBUG:: running xscale with %3d files.." % len(
            xds_ascii_files)
        try:
            xscale.run_xscale(xscale_inp,
                              cbf_to_dat=True,
                              aniso_analysis=True,
                              use_tmpdir_if_available=self.xscale_params.
                              use_tmpdir_if_available)
        except:
            print >> self.out, traceback.format_exc()

        xscale_log = open(xscale_lp).read()
        if "!!! ERROR !!! INSUFFICIENT NUMBER OF COMMON STRONG REFLECTIONS." in xscale_log:
            print >> self.out, "DEBUG:: Need to choose files."

            # From XDS ver. March 1, 2015, it kindly informs which dataset has no common reflections.
            # ..but does not print the table. Sometimes only one dataset is left. Should we make table by ourselves?
            # Older versions just print correlation table and stop.
            if "CORRELATIONS BETWEEN INPUT DATA SETS AFTER CORRECTIONS" in xscale_log:
                G = xscalelp.construct_data_graph(xscale_lp,
                                                  min_common_refs=10)
                #nx.write_dot(G, os.path.join(self.workdir, "common_set_graph.dot"))
                cliques = [c for c in nx.find_cliques(G)]
                cliques.sort(key=lambda x: len(x))
                if self._counter == 1:
                    max_clique = cliques[-1]
                else:
                    idx_prevfile = 1 if self.reference_file else 0
                    max_clique = filter(
                        lambda x: idx_prevfile in x,
                        cliques)[-1]  # xscale.hkl must be included!

                if self.reference_file:
                    max_clique = [
                        0,
                    ] + filter(lambda x: x != 0, max_clique)

                for f in "XSCALE.INP", "XSCALE.LP":
                    util.rotate_file(os.path.join(self.workdir, f))

                try_later = map(
                    lambda i: xds_ascii_files[i],
                    filter(lambda x: x not in max_clique, G.nodes()))

                print >> self.out, "DEBUG:: %d files can be merged. %d files will be merged later." % (
                    len(max_clique), len(try_later))
                print >> self.out, "DEBUG:: %d files are of no use." % (
                    len(xds_ascii_files) - len(G.nodes()))
                for i in filter(lambda j: j not in G.nodes(),
                                xrange(len(xds_ascii_files))):
                    self.removed_files.append(xds_ascii_files[i])
                    self.removed_reason[xds_ascii_files[i]] = "no_common_refls"

                self.run_cycle(map(lambda i: xds_ascii_files[i], max_clique))

                assert len(
                    try_later
                ) <= 0  # Never be the case with newer xscale!! (if the case, check_remove_list() should be modified to skip_num+=1
                if len(try_later) > 0:
                    print >> self.out, "Trying to merge %d remaining files.." % len(
                        try_later)
                    next_files = [os.path.join(self.workdir, "xscale.hkl")
                                  ] + try_later
                    if self.reference_file:
                        next_files = [
                            self.reference_file,
                        ] + next_files
                    self.workdir = self.request_next_workdir()
                    self.run_cycle(next_files)
                    return
            else:
                bad_idxes = xscalelp.read_no_common_ref_datasets(xscale_lp)
                print >> self.out, "DEBUG:: %d files are of no use." % (
                    len(bad_idxes))

                for f in "XSCALE.INP", "XSCALE.LP":
                    util.rotate_file(os.path.join(self.workdir, f))

                # XXX Actually, not all datasets need to be thrown.. some of them are useful..
                for i in bad_idxes:
                    self.removed_files.append(xds_ascii_files[i])
                    self.removed_reason[xds_ascii_files[i]] = "no_common_refls"

                self.run_cycle(
                    map(
                        lambda i: xds_ascii_files[i],
                        filter(lambda j: j not in bad_idxes,
                               xrange(len(xds_ascii_files)))))

            return
        elif "!!! ERROR !!! USELESS DATA ON INPUT REFLECTION FILE" in xscale_log:
            print >> self.out, "DEBUG:: Need to discard useless data."
            unuseful_data = [
                xscalelp.get_read_data(xscale_lp)[-1]
            ]  #filter(lambda x: x[2]==0, xscalelp.get_read_data(xscale_lp))
            if len(unuseful_data) == 0:
                print >> self.out, "I don't know how to fix it.."
                return
            remove_idxes = map(lambda x: x[0] - 1, unuseful_data)
            remove_idxes = self.check_remove_list(remove_idxes)
            keep_idxes = filter(lambda x: x not in remove_idxes,
                                xrange(len(xds_ascii_files)))
            for i in remove_idxes:
                self.removed_files.append(xds_ascii_files[i])
                self.removed_reason[xds_ascii_files[i]] = "useless"

            for f in "XSCALE.INP", "XSCALE.LP":
                util.rotate_file(os.path.join(self.workdir, f))
            self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes))
            return
        elif "INACCURATE SCALING FACTORS." in xscale_log:
            # Actually I don't know how to fix this.. (bug?) but worth proceeding (discarding bad data may solve problem).
            print >> self.out, "'INACCURATE SCALING FACTORS' happened.. but ignored."
        elif "!!! ERROR !!!" in xscale_log:
            print >> self.out, "Unknown error! please check the XSCALE.LP and fix the program."
            return

        # Re-scale by changing reference
        rescale_for = None
        if len(self.reject_method) == 0:
            rescale_for = self.reference_choice  # may be None
        elif reference_idx is None:
            rescale_for = "bmed"

        if rescale_for is not None and len(xds_ascii_files) > 1:
            ref_num = xscale.decide_scaling_reference_based_on_bfactor(
                xscale_lp, rescale_for, return_as="index")
            if reference_idx != ref_num:
                print >> self.out, "Rescaling with %s" % rescale_for
                for f in "XSCALE.INP", "XSCALE.LP":
                    util.rotate_file(os.path.join(self.workdir, f))
                self.run_cycle(xds_ascii_files, reference_idx=ref_num)

        if len(self.reject_method) == 0:
            return

        # Remove bad data
        remove_idxes = []
        remove_reasons = {}

        if self.reject_method[0] == "framecc":
            print >> self.out, "Rejections based on frame CC"
            from yamtbx.dataproc.xds.command_line import xscale_cc_against_merged

            # list of [frame, n_all, n_common, cc] in the same order
            framecc = xscale_cc_against_merged.run(
                hklin=os.path.join(self.workdir, "xscale.hkl"),
                output_dir=self.workdir).values()
            if self.reject_params.framecc.method == "tukey":
                ccs = numpy.array(
                    map(lambda x: x[3], reduce(lambda x, y: x + y, framecc)))
                ccs = ccs[ccs == ccs]  # Remove nan
                q25, q75 = numpy.percentile(ccs, [25, 75])
                cc_cutoff = q25 - self.reject_params.framecc.iqr_coeff * (q75 -
                                                                          q25)
                print >> self.out, " frameCC cutoff = %.4f (%.2f*IQR)" % (
                    cc_cutoff, self.reject_params.framecc.iqr_coeff)
            else:
                cc_cutoff = self.reject_params.framecc.abs_cutoff
                print >> self.out, " frameCC cutoff = %.4f (value specified)" % cc_cutoff

            for i, cclist in enumerate(framecc):
                useframes = map(lambda x: x[0],
                                filter(lambda x: x[3] > cc_cutoff, cclist))
                if len(useframes) == 0:
                    remove_idxes.append(i)
                    remove_reasons.setdefault(i, []).append("allbadframe")
                    continue

                f = xds_ascii_files[i]
                xac = XDS_ASCII(f)
                if set(useframes).issuperset(
                        set(range(min(xac.iframe), max(xac.iframe)))):
                    continue  # All useful frames.

                sel = xac.iframe == useframes[0]
                for x in useframes[1:]:
                    sel |= xac.iframe == x
                if sum(sel) < 10:  # XXX care I/sigma
                    remove_idxes.append(i)
                    remove_reasons.setdefault(i, []).append("allbadframe")
                    continue

                print >> self.out, "Extracting frames %s out of %d-%d in %s" % (
                    ",".join(map(str, useframes)), min(
                        xac.iframe), max(xac.iframe), f)

                newf = self.request_file_modify(f)
                xac.write_selected(sel, newf)

            self.reject_method.pop(0)  # Perform only once

        elif self.reject_method[0] == "lpstats":
            if "bfactor" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >> self.out, "Rejections based on B-factor outliers (%.2f*IQR)" % iqrc
                Bs = numpy.array(
                    map(lambda x: x[1], xscalelp.get_k_b(xscale_lp)))
                if len(Bs) > 1:  # If one data, K & B table is not available.
                    q25, q75 = numpy.percentile(Bs, [25, 75])
                    iqr = q75 - q25
                    lowlim, highlim = q25 - iqrc * iqr, q75 + iqrc * iqr
                    count = 0
                    for i, b in enumerate(Bs):
                        if b < lowlim or b > highlim:
                            remove_idxes.append(i)
                            remove_reasons.setdefault(i, []).append("bad_B")
                            count += 1

                    print >> self.out, " %4d B-factor outliers (<%.2f, >%.2f) removed" % (
                        count, lowlim, highlim)
                else:
                    print >> self.out, " B-factor outlier rejection is not available."

            if "em.b" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >> self.out, "Rejections based on error model b outliers (%.2f*IQR)" % iqrc
                bs = numpy.array(
                    map(lambda x: x[1], xscalelp.get_ISa(xscale_lp)))
                q25, q75 = numpy.percentile(bs, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc * iqr, q75 + iqrc * iqr
                count = 0
                for i, b in enumerate(bs):
                    if b < lowlim or b > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_em.b")
                        count += 1

                print >> self.out, " %4d error model b outliers (<%.2f, >%.2f) removed" % (
                    count, lowlim, highlim)

            if "em.ab" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >> self.out, "Rejections based on error model a*b outliers (%.2f*IQR)" % iqrc
                vals = numpy.array(
                    map(lambda x: x[0] * x[1], xscalelp.get_ISa(xscale_lp)))
                q25, q75 = numpy.percentile(vals, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc * iqr, q75 + iqrc * iqr
                count = 0
                for i, ab in enumerate(vals):
                    if ab < lowlim or ab > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_em.ab")
                        count += 1

                print >> self.out, " %4d error model a*b outliers (<%.2f, >%.2f) removed" % (
                    count, lowlim, highlim)

            if "rfactor" in self.reject_params.lpstats.stats:
                iqrc = self.reject_params.lpstats.iqr_coeff
                print >> self.out, "Rejections based on R-factor outliers (%.2f*IQR)" % iqrc
                rstats = xscalelp.get_rfactors_for_each(xscale_lp)
                vals = numpy.array(map(lambda x: rstats[x][-1][1],
                                       rstats))  # Read total R-factor
                q25, q75 = numpy.percentile(vals, [25, 75])
                iqr = q75 - q25
                lowlim, highlim = q25 - iqrc * iqr, q75 + iqrc * iqr
                count = 0
                for i, v in enumerate(vals):
                    if v < lowlim or v > highlim:
                        remove_idxes.append(i)
                        remove_reasons.setdefault(i, []).append("bad_R")
                        count += 1

                print >> self.out, " %4d R-factor outliers (<%.2f, >%.2f) removed" % (
                    count, lowlim, highlim)

            if "pairwise_cc" in self.reject_params.lpstats.stats:
                corrs = xscalelp.get_pairwise_correlations(xscale_lp)
                if self.reject_params.lpstats.pwcc.method == "tukey":
                    q25, q75 = numpy.percentile(map(lambda x: x[3], corrs),
                                                [25, 75])
                    iqr = q75 - q25
                    lowlim = q25 - self.reject_params.lpstats.pwcc.iqr_coeff * iqr
                    print >> self.out, "Rejections based on pairwise_cc < %.4f (IQR=%.2f)" % (
                        lowlim, iqr)
                else:
                    lowlim = self.reject_params.lpstats.pwcc.abs_cutoff
                    print >> self.out, "Rejections based on pairwise_cc < %.4f" % lowlim

                bad_corrs = filter(lambda x: x[3] < lowlim, corrs)
                idx_bad = {}
                for i, j, common_refs, corr, ratio, bfac in bad_corrs:
                    idx_bad[i] = idx_bad.get(i, 0) + 1
                    idx_bad[j] = idx_bad.get(j, 0) + 1

                idx_bad = idx_bad.items()
                idx_bad.sort(key=lambda x: x[1])
                count = 0
                for idx, badcount in reversed(idx_bad):
                    remove_idxes.append(idx - 1)
                    remove_reasons.setdefault(idx - 1, []).append("bad_pwcc")
                    bad_corrs = filter(lambda x: idx not in x[:2], bad_corrs)
                    if len(bad_corrs) == 0: break
                    fun_key = lambda x: x[3]
                    print >> self.out, " Removing idx=%d (CC %.3f..%.3f) remaining %d bad pairs" % (
                        idx, min(bad_corrs, key=fun_key)[3],
                        max(bad_corrs, key=fun_key)[3], len(bad_corrs))
                    count += 1
                print >> self.out, " %4d pairwise CC outliers removed" % count

            self.reject_method.pop(0)  # Perform only once
        elif self.reject_method[0] == "delta_cc1/2":
            print >> self.out, "Rejection based on delta_CC1/2 in %s shell" % self.delta_cchalf_bin
            table = xscalelp.read_stats_table(xscale_lp)
            i_stat = -1 if self.delta_cchalf_bin == "total" else -2
            prev_cchalf = table["cc_half"][i_stat]
            prev_nuniq = table["nuniq"][i_stat]
            # file_name->idx table
            remaining_files = collections.OrderedDict(
                map(lambda x: x[::-1], enumerate(xds_ascii_files)))

            # For consistent resolution limit
            inp_head = self.xscale_inp_head + "SPACE_GROUP_NUMBER= %s\nUNIT_CELL_CONSTANTS= %s\n\n" % (
                sg, cell)
            count = 0
            for i in xrange(len(xds_ascii_files) -
                            1):  # if only one file, cannot proceed.
                tmpdir = os.path.join(self.workdir, "reject_test_%.3d" % i)

                cchalf_list = xscale.calc_cchalf_by_removing(
                    wdir=tmpdir,
                    inp_head=inp_head,
                    inpfiles=remaining_files.keys(),
                    stat_bin=self.delta_cchalf_bin,
                    nproc=self.nproc,
                    nproc_each=self.nproc_each,
                    batchjobs=self.batchjobs)

                rem_idx, cc_i, nuniq_i = cchalf_list[
                    0]  # First (largest) is worst one to remove.
                rem_idx_in_org = remaining_files[remaining_files.keys()
                                                 [rem_idx]]

                # Decision making by CC1/2
                print >> self.out, "DEBUG:: cycle %.3d remove %3d if %.2f*%d > %.2f*%d" % (
                    i, rem_idx_in_org, cc_i, nuniq_i, prev_cchalf, prev_nuniq)
                if cc_i * nuniq_i <= prev_cchalf * prev_nuniq: break
                print >> self.out, "Removing idx= %3d gained CC1/2 by %.2f" % (
                    rem_idx_in_org, cc_i - prev_cchalf)

                prev_cchalf, prev_nuniq = cc_i, nuniq_i
                remove_idxes.append(rem_idx_in_org)
                remove_reasons.setdefault(rem_idx_in_org,
                                          []).append("bad_cchalf")
                del remaining_files[remaining_files.keys()
                                    [rem_idx]]  # remove file from table
                count += 1

            print >> self.out, " %4d removed by DeltaCC1/2 method" % count

            if self.next_delta_cchalf_bin != []:
                self.delta_cchalf_bin = self.next_delta_cchalf_bin.pop(0)
            else:
                self.reject_method.pop(0)
        else:
            print >> self.out, "ERROR:: Unsupported reject_method (%s)" % reject_method

        # Remove duplicates
        remove_idxes = list(set(remove_idxes))
        remove_idxes = self.check_remove_list(remove_idxes)
        if len(remove_idxes) > 0:
            print >> self.out, "DEBUG:: Need to remove %d files" % len(
                remove_idxes)
            for i in sorted(remove_idxes):
                print >> self.out, " %.3d %s" % (i, xds_ascii_files[i])
                self.removed_files.append(xds_ascii_files[i])
                self.removed_reason[xds_ascii_files[i]] = ",".join(
                    remove_reasons[i])

        # Next run
        keep_idxes = filter(lambda x: x not in remove_idxes,
                            xrange(len(xds_ascii_files)))
        if len(self.reject_method) > 0 or len(remove_idxes) > 0:
            self.workdir = self.request_next_workdir()
            self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes))
        elif self.reference_choice is not None and len(keep_idxes) > 1:
            # Just re-scale with B reference
            ref_num = xscale.decide_scaling_reference_based_on_bfactor(
                xscale_lp, self.reference_choice, return_as="index")
            if reference_idx != ref_num:
                print >> self.out, "Rescaling2 with %s" % self.reference_choice
                for f in "XSCALE.INP", "XSCALE.LP":
                    util.rotate_file(os.path.join(self.workdir, f))
                self.run_cycle(map(lambda i: xds_ascii_files[i], keep_idxes),
                               reference_idx=ref_num)
Exemple #52
0
'''
Finding cliques (II)
Great work! Let's continue by finding a particular maximal clique, and then plotting that clique.

Instructions
100 XP
Find the author(s) that are part of the largest maximal clique, and plot the subgraph of that/one of those clique(s) using a CircosPlot. To do this:
Use the nx.find_cliques() function to calculate the maximal cliques in G. Place this within the provided sorted() function to calculate the largest maximal clique.
Create the subgraph consisting of the largest maximal clique using the .subgraph() method and largest_clique.
Create the CircosPlot object using the subgraph G_lc (without any other arguments) and plot it.
'''
SOLUTION
# Import necessary modules
import networkx as nx
from nxviz import CircosPlot
import matplotlib.pyplot as plt

# Find the author(s) that are part of the largest maximal clique: largest_clique
largest_clique = sorted(nx.find_cliques(G), key=lambda x: len(x))[-1]

# Create the subgraph of the largest_clique: G_lc
G_lc = G.subgraph(largest_clique)

# Create the CircosPlot object: c
c = CircosPlot(G_lc)

# Draw the CircosPlot to the screen
c.draw()
plt.show()
Exemple #53
0
fin = open("GameOfThrones.txt", 'rb')
G = nx.read_edgelist('GameOfThrones.txt',
                     nodetype=str,
                     delimiter=",",
                     data=(("weight", int), ("season", int)))
fin.close()

drawGraph(G)
'''
################################################
Step 2 output:
a) No. of maximal cliques
b) Size of largest maximal clique
c) No. of maximal cliques of the largest size
##################################################'''
maxCliques = nx.find_cliques(G)
biggestClique = 0
numBiggestCliques = 0

ctr = 0
for clique in maxCliques:
    ctr += 1
    if (len(clique) > biggestClique):
        biggestClique = len(clique)

print("Number of Maximal Cliques: " + str(ctr))
print("Size of Largest Maximal Clique: " + str(biggestClique))

#Could only loop through generator once, so just remake it
maxCliques = nx.find_cliques(G)
ctr = 0
Exemple #54
0
            c += 1
            end = datetime.now()

    print 'times:', end - start


G = prepare_graph([
    path + 'train_unigram.csv',
    path + 'test_unigram.csv',
])

count = 0
d_nodes = dict()
d_edges = dict()
keywords = []
for c in nx.find_cliques(G):
    l_nodes = len(c)
    # l_edges = len(c.edges())
    if l_nodes <= 2:
        continue
    start = True
    key = []
    for node in c:
        if start:
            key = set(node.split(' '))
            start = False
        else:
            key = set(node.split(' ')).intersection(key)
        if len(key) == 0:
            break
    # print(key)
def construct(DATA, SENTIDIFF_THRES):
    """
    :param DATA: is a DataFrame with columns 'Person, Topic, 'Sentiment', and 'Speech'.
    :param SENTIDIFF_THRES:
    :return:
    """
    # ================================================================================
    # ----- FOR DEBUGGING
    # TIME_FRAME = '2017'
    # METHOD = 'nmf'
    # PATH = f"results/"

    # PARAMETERS
    # text = pd.read_csv(f"{PATH}{TIME_FRAME}/ssm_results_{TIME_FRAME}.csv")
    # thresholds = pd.read_csv(f"{PATH}")
    # ================================================================================
    # ----- Construct Weighted Graph
    startTime = tm.perf_counter()
    G = nx.Graph()
    G.clear()

    # ----- Add nodes
    print('\nAdding nodes for graph...')
    for i in DATA.index:
        row = DATA.loc[i]
        person = row['Person']
        # Only add actor if the actor hasn't already been added
        if not G.has_node(person):
            # Construct dataFrame for text attribute of node
            # Extract all text from the actor
            data = DATA[DATA['Person'] == person]
            data.index = range(len(data))

            # Add node with its corresponding attributes
            G.add_node(
                person,
                gender=row['Gender'],
                party=row['Party'],
                metro=row['Metro'],
                data=data
            )
            # Print progress...
            if i % 50 == 0:
                print(f"{i:{5}} of {len(DATA):{5}}\t{dict(row['Speech_id Date Person Party'.split()])}")
    print('All nodes of graph succesfully added!')

    # ----- Add edges
    print('\nAdding edges for graph...')
    for i, row_i in DATA.iterrows():
        # Extract name, topic and sentiment of person1
        p_i = row_i['Person']
        t_i = row_i['Topic']
        s_i = row_i['Senti_comp']

        for j, row_j in DATA[:i+1].iterrows():
            # Extract name, topic and sentiment of person2
            p_j = row_j['Person']
            t_j = row_j['Topic']
            s_j = row_j['Senti_comp']

            # Print progress...
            if (i%50 == 0) and (j%50 == 0):
                print(
                    f"{i:{5}},{j:{5}} of {len(DATA):{5}}\t{p_i:{20}}{p_j:{20}}\tt_i: {int(t_i)}\tt_j: {int(t_j)}")

            # Both actors cannot be the same person
            # Both actors must spoke of the same topic
            # Both sentiment of the same topic must be of the same polarity
            if (p_i != p_j) and (t_i == t_j) and (s_i*s_j > 0):
                # Compute sentiment difference
                sentiDiff = abs(s_i - s_j)
                # Both sentiment towards the topic must be less than the threshold
                if sentiDiff < SENTIDIFF_THRES:
                    # If there is no edge between both actors, construct an edge. Otherwise, update attribtes of the existing edge.
                    if not G.has_edge(p_i, p_j):
                        agreedSpeeches = {
                            'topic'    : t_i,
                            'sentiDiff': sentiDiff,
                            'text'     : pd.DataFrame([row_i, row_j])
                        }
                        G.add_edge(p_i, p_j, weight=1, agreedSpeeches=[agreedSpeeches])
                    else:
                        # Extract text from already existing edge
                        edgeData = G.get_edge_data(p_i, p_j)

                        # Compute new weight and update weight text
                        weight_old = edgeData['weight']
                        weight_new = weight_old + 1
                        # Construct new agreedSpeeches dict and append to existing agreedSpeeches
                        agreedSpeeches_old = edgeData['agreedSpeeches']
                        agreedSpeeches_new = [{
                            'topic'    : t_i,
                            'sentiDiff': sentiDiff,
                            'text'     : pd.DataFrame([row_i, row_j])
                        }]
                        agreedSpeeches_new.append(agreedSpeeches_old)

                        # Update information of the edge
                        G.add_edge(p_i, p_j, weight=weight_new, agreedSpeeches=agreedSpeeches_new)
    print('All edges of graph succesfully added!')

    # ================================================================================
    # ----- Compute degree of centrality and add as node attribute
    # Centrality has to be normalised to the max possible number of agreements a node can have
    # This is computed by (number of speeches made by actor)*[(total number of speeches) - (number of speeches made by actor)]
    # G.degree() returns the number of edges adjacent to a node, taking into account of the edge weight
    cent = {n: G.degree(n, weight='weight') for n in list(G.node)}
    cent = pd.DataFrame.from_dict(cent, orient='index', columns='degree'.split())

    # Compute number of speeches each actor have made
    actorSpeechCnt = {}
    for n in list(G.node):
        actorSpeechCnt[n] = len(DATA[DATA['Person'] == n])
    # Compute normalised degree of centrality
    cent_norm = {}
    for n in list(G.node):
        cent_max = actorSpeechCnt[n]*(len(DATA) - actorSpeechCnt[n])
        cent_norm[n] = cent['degree'].loc[n]/cent_max
    cent_norm = pd.DataFrame.from_dict(cent_norm, orient='index', columns='centrality'.split())

    # Place normalised text in dataFrame and sort according it
    cent['centrality'] = cent_norm
    cent.sort_values(by='centrality', ascending=False, inplace=True)

    # Add centrality information to node attribute
    nx.set_node_attributes(G, cent['centrality'], 'centrality')

    # ================================================================================
    # ----- Compute cliques and add clique group number as node attribute
    # Construct a dictionary containing cliques within the network labeled by its clique#
    cliqueList = list(enumerate(nx.find_cliques(G)))

    # For every actor in the network, search all networkCliques to find if the actor is in it
    # Return a dict of actors and the corresponding clique# that the actor is in
    cliqueNum = {}
    actors = np.sort(list(G.node))
    for p in actors:
        inClique = []
        for i, clq in enumerate(cliqueList):
            if p in clq:
                inClique.append(i)
        cliqueNum[p] = inClique

    # Add clique information to node attribute
    nx.set_node_attributes(G, cliqueNum, 'cliques')

    dur = tm.gmtime(tm.perf_counter() - startTime)
    print(f"\nGraph construction complete!")
    print(f"Construction took {dur.tm_sec}s")
    print(f"{len(cliqueList)} cliques found")
    # Print percentage of edges removed by threshold


    # =====================================================================================
    # ----- FOR DEBUGGING
    # # Save results
    # nx.write_gpickle(G, f"{PATH}{TIME_FRAME}/ssm_weightedGraph_{TIME_FRAME}.gpickle")
    # cent.to_csv(f"{PATH}{TIME_FRAME}/ssm_centrality_{TIME_FRAME}.csv")
    # with open(f"{PATH}{TIME_FRAME}/ssm_cliques_{TIME_FRAME}.pickle", "wb") as file:
    #     pickle.dump(cliques, file)
    # ================================================================================

    return G, cent, cliqueList
g.add_nodes_from(words)  #add the words we identified as nodes

for i in range(0, cooc_upper.shape[0]):
    for j in range(0, cooc_upper.shape[1]):
        if cooc_upper[i,
                      j] == 1:  # add an edge only if both values are provided
            g.add_edge(words[i], words[j])

# Remove nodes with no edges
degree = g.degree()
for n in g.nodes():
    if degree[n] == 0:
        g.remove_node(n)

#Find maximal cliques and visualise

coords = nx.spring_layout(g)

# remove "len(clique)>2" if you're interested in maxcliques with 2 edges
cliques = [clique for clique in nx.find_cliques(g) if len(clique) > 2]

#draw the graph

for clique in cliques:
    print "Clique to appear: ", clique
    H = g.subgraph(clique)
    col = colors.next()
    nx.draw_networkx(H, node_colors=col, with_Lables=True)
    plt.show()
    plt.clf()
Exemple #57
0
 def test_find_cliques1(self):
     cl = list(nx.find_cliques(self.G))
     rcl = nx.find_cliques_recursive(self.G)
     expected = [[2, 6, 1, 3], [2, 6, 4], [5, 4, 7], [8, 9], [10, 11]]
     assert_equal(sorted(map(sorted, cl)), sorted(map(sorted, rcl)))
     assert_equal(sorted(map(sorted, cl)), sorted(map(sorted, expected)))
def k_clique_communities(G, k, cliques=None):
    """Find k-clique communities in graph using the percolation method.

    A k-clique community is the union of all cliques of size k that
    can be reached through adjacent (sharing k-1 nodes) k-cliques.

    Parameters
    ----------
    G : NetworkX graph

    k : int
       Size of smallest clique

    cliques: list or generator       
       Precomputed cliques (use networkx.find_cliques(G))

    Returns
    -------
    Yields sets of nodes, one for each k-clique community.

    Examples
    --------
    >>> G = nx.complete_graph(5)
    >>> K5 = nx.convert_node_labels_to_integers(G,first_label=2)
    >>> G.add_edges_from(K5.edges())
    >>> c = list(nx.k_clique_communities(G, 4))
    >>> list(c[0])
    [0, 1, 2, 3, 4, 5, 6]
    >>> list(nx.k_clique_communities(G, 6))
    []

    References
    ----------
    .. [1] Gergely Palla, Imre Derényi, Illés Farkas1, and Tamás Vicsek,
       Uncovering the overlapping community structure of complex networks 
       in nature and society Nature 435, 814-818, 2005,
       doi:10.1038/nature03607
    """
    if k < 2:
        raise nx.NetworkXError("k=%d, k must be greater than 1." % k)
    if cliques is None:
        cliques = nx.find_cliques(G)
    cliques = [frozenset(c) for c in cliques if len(c) >= k]

    # First index which nodes are in which cliques
    membership_dict = defaultdict(list)
    for clique in cliques:
        for node in clique:
            membership_dict[node].append(clique)

    # For each clique, see which adjacent cliques percolate
    perc_graph = nx.Graph()
    perc_graph.add_nodes_from(cliques)
    for clique in cliques:
        for adj_clique in _get_adjacent_cliques(clique, membership_dict):
            if len(clique.intersection(adj_clique)) >= (k - 1):
                perc_graph.add_edge(clique, adj_clique)

    # Connected components of clique graph with perc edges
    # are the percolated cliques
    for component in nx.connected_components(perc_graph):
        yield (frozenset.union(*component))
Exemple #59
0
 def test_directed(self):
     cliques = nx.find_cliques(nx.DiGraph())
Exemple #60
0
topology = iu.module_from_spec(spec)
spec.loader.exec_module(topology)


def betti(C):
    return topology.betti_bin_cpp(C, worker="../cpp/UV/rank")
    #return topology.betti_bin(C)


import networkx as nx
import numpy as np
import itertools

G = nx.fast_gnp_random_graph(15, 0.6, seed=0)

C = list(nx.find_cliques(G))
H = nx.make_max_clique_graph(G)
assert (H.number_of_nodes() == len(C))

for (a, b) in H.edges():
    assert (len(set(C[a]).intersection(C[b])))

print("Full:")
print("G:", betti(nx.find_cliques(G)))
print("H:", betti(nx.find_cliques(H)))

H.remove_nodes_from([n for n in H.nodes() if (len(C[n]) <= 3)])
print("After removal:")
print("G:", betti(nx.find_cliques(G)))
print("H:", betti(nx.find_cliques(H)))