def communities(self, nCommunities, weight=None):
		"""
		Compute communities.

		Parameters
		----------
		nCommunities - number of communities to be returned.
			This is added to simplify the process, the original GN algorithm doesn't 
			need predecided number of communities. 
			Other measures like a threshold on betweenness centrality can be used instead.
		
		weight (string) - If None, all edge weights are considered equal. 
			Otherwise holds the name of the edge attribute used as weight. 


		Returns
		--------
		A list of communities where each community is a list of the nodes in the community.	 
		"""
		gr = self.g
		n = nx.number_connected_components(gr)
		components = nx.connected_components(gr)

		while (n < nCommunities):
			gr = self.communitySplits(gr, weight=weight)
			components = nx.connected_components(gr)
			n = nx.number_connected_components(gr)
			if gr.number_of_edges() == 0:
				break
		return components
	def communitySplits(self, graph):
		"""
		Compute the splits for the formation of communities. 

		Arguments:
			graph -  A networkx graph of digraph. 

		Returns:
			The graph with weak edges removed. 	
		"""

		nConnComp = nx.number_connected_components(graph)
		nComm = nConnComp

		while (nComm <= nConnComp):
			betweenness = nx.edge_betweenness_centrality(graph)
			if (len(betweenness.values()) != 0 ):
				max_betweenness = max(betweenness.values())
			else:
				break	
			for u,v in betweenness.iteritems():
				if float(v) == max_betweenness:
					graph.remove_edge(u[0], u[1])
			nComm = nx.number_connected_components(graph)			
		return graph		
def detectBetweenness(G, numClusters, sites, bipartite):
	Gnew = copy.deepcopy(G)
	numComponents = nx.number_connected_components(G)

	betweenness = nx.edge_betweenness_centrality(Gnew,  weight='capacity')
	pickle.dump(betweenness, open("betweennessUnipartite.p", "wb"))
	#betweenness = pickle.load("betweenessUnipartite.p", "rb")
	
	while (numComponents < numClusters):
		print "num components is now ",  numComponents ### REMEMBER TO DELETE THIS ###

		# calculate betweenness of each edge
		betweenness = nx.edge_betweenness_centrality(Gnew,  weight='capacity')

		## identify and remove the edge with highest betweenness
		max_ = max(betweenness.values())
		for k, v in betweenness.iteritems():
			if float(v) == max_:
				G.remove_edge(k[0], k[1])
		numComponents = nx.number_connected_components(G)

	clusters = {}
	i=0
	j = 0
	for component in list(nx.connected_components(Gnew)):
		for node in component:
			if node in sites:
				clusters[node] = i
				j +=1
		print j, "Nodes in cluster ", i
		j = 0
		i += 1

	return clusters
Exemple #4
0
def deleteExtraEdges(cg, b,VERBOSE=False):            
    ndist = {}
    numConnected = nx.number_connected_components(cg)
    if( VERBOSE ):
        print("number of nodes is ",cg.number_of_nodes())
    for n in cg.neighbors(b):   
        # test whether deleting the edge between n and b increases
        # the number of connected components
        cg.remove_edge(b,n)
        newNumConnected = nx.number_connected_components(cg)
        if( newNumConnected == numConnected ): # then this could be a valid deletion
            # compute the step distance from n to its neighbor b
            if( VERBOSE ):
                print("the edge between %s and %s can be cut without changing the topology of the graph"%(b,n))
            ndist[(b,n)] = math.sqrt((n[0]-b[0])**2+(n[1]-b[1])**2+(n[2]-b[2])**2)
        cg.add_edge(b,n)
    if( ndist ):
        items = list(ndist.items())
        #rearrange node,distance pairing so we can sort on distance
        k,v = list(zip(*items))
        items = list(zip(v,k))
        maxNeighbor = max(items)
        # cut the maximum step length edge that is valid to cut
        if( VERBOSE ):
            print("removing edge",maxNeighbor[1][0],maxNeighbor[1][1])
        cg.remove_edge(maxNeighbor[1][0],maxNeighbor[1][1])
        cg = deleteExtraEdges(cg,b)
    return cg
Exemple #5
0
def general_fiedler(G, k, trials, plotname):
    '''Number of components when you apply the threshold cut on a random vector in the span of 1st k'''
    v = keigenvectors(G, k)
    print v
    flag = 1
    x_data = []
    y_data = []
    for i in range(trials):
        z = randomvector(v)
        (y1, y2) = thresholdcut(z, 0)
        H1 = G.subgraph(y1)
        n1 = nx.number_connected_components(H1)
        H2 = G.subgraph(y2)
        n2 = nx.number_connected_components(H2)
        if n1 < n2:
            n = n1
        else:
            n = n2
        x_data.append(i)
        y_data.append(n)
        if n > k-1:
            flag = 0
            print 'Number of components: ' + str(n)
            print 'z = ' + str(z)
    if flag:
        print 'Not found, number of components: ' + str(n)
    k_data = [k-1 for x in x_data]
    plt.plot(x_data, y_data, 'ro')
    plt.plot(x_data, k_data, linewidth=2)
    plt.axis([0, trials, 0, k+10])
    plt.savefig(plotname)
def get_bridges(graph):
    all_edges = graph.edges(keys=True,data=True)
    for e in all_edges:
        graph.remove_edge(*e[:-1])
        removed_comps = nx.number_connected_components(graph)
        graph.add_edge(*e) # Will maintain the original key associated with this edge
        if nx.number_connected_components(graph) < removed_comps:
            yield e
def get_number_of_components(filename):
  import networkx as nx
  threshold = 0
  f = open(filename[:-4]+'_components.dat','w')
  for i in range(0,101):
    threshold = float(i)/100
    G = get_threshold_matrix(filename, threshold)
    print 'number of connected components:', nx.number_connected_components(G)
    f.write("%f\t%d\n" % (threshold, nx.number_connected_components(G)))
  f.close()
def Girvannewman(G):
    initialcomp = nx.number_connected_components(G)
    '''totalnumcomp = initialcomp
    while totalnumcomp <= initialcomp:'''
    bw = nx.edge_betweenness_centrality(G)
    maximum_value = max(bw.values())
    for key, value in bw.iteritems():
        if float(value) == maximum_value:
            G.remove_edge(key[0],key[1])
    totalnumcomp = nx.number_connected_components(G)
Exemple #9
0
def convert_to_lineage():
	inf_modes = ['incidence_p', 'incidence_c']
	exits = ['c_to_death', 'remove_s', 'remove_p', 'remove_c']
	
	parent = "/home/ethan/Dropbox/pkl/"
	index = 0
	
	for file in os.listdir(parent):
		print file
		infile = open(parent + file, 'r')
		
		lineage = nx.DiGraph(weighted=True)		
		abm = cPickle.load(infile)
		tree = abm.tree
		history = abm.agent_history
		infected = sorted(tree.nodes(), key=lambda x: x.i_time)
		terminal_map = {}
		
		for i in infected:
			try: 
				a = history[i]
			except KeyError:
				infected.remove(i)
	
		for i in infected:
			out = []
			out.append(i)
			nei = sorted(tree.neighbors(i), key=lambda x: x.i_time)
			for n in nei:
				if n.i_time > i.i_time:
					out.append(n)
			
			end_time = 5000
			
			terminus = Agent()
			terminus.i_time = end_time 
			terminus.ID = i.ID
			
			for event in history[i]:
					if event[0] in exits:
						terminus.i_time = event[1]
		
			out.append(terminus)
			terminal_map[i] = terminus 
			
			for x in range(len(out) - 1):
				lineage.add_edge(out[x], out[x + 1], data=abs(out[x].i_time - out[x + 1].i_time))
		
		dic = {'lineage' : lineage, 'history' : history, 'terminal map' : terminal_map}
		out = open(parent + 'lin' + str(index) + '.pkl', 'w')
		cPickle.dump(dic, out)
		print nx.number_connected_components(lineage.to_undirected()), nx.number_connected_components(tree)
		infile.close()
		out.close()		
		index += 1
Exemple #10
0
 def info(self):
     print "============================"
     print nx.info(self.G)
     print "============================"
     #print "degree distribution: "
     #print nx.degree_histogram(self.G)
     print "============================"
     print "number of connected components:"
     if self.directed_graph == False:
         print nx.number_connected_components(self.G)
     print "============================"
Exemple #11
0
def CmtyGirvanNewmanStep(G):
    init_ncomp = nx.number_connected_components(G)    #no of components
    ncomp = init_ncomp
    while ncomp <= init_ncomp:
        bw = nx.edge_betweenness_centrality(G, weight='weight')    #edge betweenness for G
        #find the edge with max centrality
        max_ = max(bw.values())
        #find the edge with the highest centrality and remove all of them if there is more than one!
        for k, v in bw.iteritems():
            if float(v) == max_:
                G.remove_edge(k[0],k[1])    #remove the central edge
        ncomp = nx.number_connected_components(G)    #recalculate the no of components
Exemple #12
0
def getTrafficConnectedComponentGraph(G):
    H = G.copy()
    to_remove = []
    for (s,d) in H.edges(): 
       if H[s][d]['weight'] <= 2:
           to_remove.extend([(s,d)])
    H.remove_edges_from(to_remove)
    #print list(networkx.connected_components(H))
    print networkx.number_connected_components(H)
    Gc = max(networkx.connected_component_subgraphs(H), key=len)
    #drawGraph(Gc, connected=True)
    return Gc
Exemple #13
0
 def IsDivided(fragment):
     nodes = set()
     for ring in fragment:
         nodes |= set(_rings[ring])
     G2 = _G.copy()
     ebunch = []
     for i in nodes:
         for j in _G.neighbors(i):
             ebunch.append((i,j))
     #G2.remove_edges_from(ebunch)
     G2.remove_nodes_from(nodes)
     logging.debug("NCOMPO: {0} {1}".format(nx.number_connected_components(G2),_ncompo))
     return nx.number_connected_components(G2) > _ncompo
Exemple #14
0
    def plot_additional(self, home_nodes, levels=0):
        """Add nodes to existing plot.  Prompt to include link to existing
        if possible.  home_nodes are the nodes to add to the graph"""

        new_nodes = self._neighbors(home_nodes, levels=levels)
        new_nodes = home_nodes.union(new_nodes)

        displayed_data_nodes = set([ v['dataG_id']
                            for k,v in self.dispG.node.items() ])

        # It is possible the new nodes create a connection with the existing
        #  nodes; in such a case, we don't need to try to find the shortest
        #  path between the two blocks
        current_num_islands = nx.number_connected_components(self.dispG)
        new_num_islands = nx.number_connected_components(
            self.dataG.subgraph(displayed_data_nodes.union(new_nodes)))
        if new_num_islands > current_num_islands:
            # Find shortest path between two blocks graph and, if it exists,
            #  ask the user if they'd like to include those nodes in the
            #  display as well.
            # First, create a block model of our data graph where what is
            #  current displayed is a block, the new nodes are a a block
            all_nodes = set(self.dataG.nodes())
            singleton_nodes = all_nodes - displayed_data_nodes - new_nodes
            singleton_nodes = map(lambda x: [x], singleton_nodes)
            partitions = [displayed_data_nodes, new_nodes] + \
                         list(singleton_nodes)
            B = nx.blockmodel(self.dataG, partitions, multigraph=True)

            # Find shortest path between existing display (node 0) and
            #  new display island (node 1)
            try:
                path = nx.shortest_path(B, 0, 1)
            except nx.NetworkXNoPath:
                pass
            else:
                ans = tkm.askyesno("Plot path?", "A path exists between the "
                  "currently graph and the nodes you've asked to be added "
                  "to the display.  Would you like to plot that path?")
                if ans: # Yes to prompt
                    # Add the nodes from the source graph which are part of
                    #  the path to the new_nodes set
                    # Don't include end points because they are the two islands
                    for u in path[1:-1]:
                        Gu = B.node[u]['graph'].nodes()
                        assert len(Gu) == 1; Gu = Gu[0]
                        new_nodes.add(Gu)

        # Plot the new nodes
        self._plot_additional(new_nodes)
def _remove_max_edge(G, weight=None):
    """
    Removes edge with the highest value on betweenness centrality.
    Repeat this step until more connected components than the connected
    components of the original graph are detected.
    """
    number_components = nx.number_connected_components(G)
    while nx.number_connected_components(G) <= number_components and G.number_of_edges():
        betweenness = nx.edge_betweenness_centrality(G, weight=weight)
        max_value = max(betweenness.values())
        # Use a list of edges because G is changed in the loop
        for edge in list(G.edges()):
            if betweenness[edge] == max_value:
                G.remove_edge(*edge)
def girvan_newman_step(G):
    '''
    INPUT: Graph G
    OUTPUT: None

    Run one step of the Girvan-Newman community detection algorithm.
    Afterwards, the graph will have one more connected component.
    '''
    init_ncomp = nx.number_connected_components(G)
    ncomp = init_ncomp
    while ncomp == init_ncomp:
        bw = Counter(nx.edge_betweenness_centrality(G))
        a, b = bw.most_common(1)[0][0]
        G.remove_edge(a, b)
        ncomp = nx.number_connected_components(G)
Exemple #17
0
def sensi_diameter(G):
    import networkx as nx
    
    """
    Compute graph sensitivity to node removal, in terms of
    the difference in graph diameter on the removal of each
    node in turn.
     
    This uses local function x_diameter(G), which is modified
    from networkx.diamter(G) to work on XGraphs.
    
    DL Urban (9 Feb 2007)
    """
    
    # Starting diameter for full graph:
    
    if nx.is_connected(G):
        d0 = x_diameter(G)
    else:
        G0 = nx.connected_component_subgraphs(G) [0] # the largest subgraph
        d0 = x_diameter(G0)
        nc = nx.number_connected_components(G)	     # how many are there?
    
    sensi = {}
    
    for node in G.nodes():
        ex = G.edges(node) 		# a set of edges adjacent to node; 
        G.delete_edges_from(ex)		# remove all of these,
        G.delete_node(node)		# and then kill the node, too
        if nx.is_connected(G):
            dx = x_diameter(G)
            cuts = 0
        else:
            Gx = nx.connected_component_subgraphs(G) [0]	# the biggest
            ncx = nx.number_connected_components(G)
            if nc == ncx:
                cuts = 0
            else:
                cuts = 1
            dx = x_diameter(Gx)
        delta = d0 - dx
        G.add_node(node)		# put the node and edges back again
        G.add_edges_from(ex)
        sensi[node] = (cuts, delta)
 

    # create and return a tuple (cuts, delta)
    return sensi
def get_single_network_measures(G, thr):
	f = open(out_prfx + 'single_network_measures.dat', 'a')
	N = nx.number_of_nodes(G)
	L = nx.number_of_edges(G)
	D = nx.density(G)
	cc = nx.average_clustering(G)
	compon = nx.number_connected_components(G)
	Con_sub = nx.connected_component_subgraphs(G)

	values = []
	values_2 =[]

	for node in G:
		values.append(G.degree(node))
	ave_deg = float(sum(values)) / float(N)
	
	f.write("%f\t%d\t%f\t%f\t%f\t%f\t" % (thr, L, D, cc, ave_deg, compon))
	#1. threshold, 2. edges, 3. density 4.clustering coefficient
	#5. average degree, 6. number of connected components
	
	for i in range(len(Con_sub)):
		if nx.number_of_nodes(Con_sub[i])>1:
			values_2.append(nx.average_shortest_path_length(Con_sub[i]))

	if len(values_2)==0:
		f.write("0.\n")
	else:
		f.write("%f\n" % (sum(values_2)/len(values_2)))
	#7. shortest pathway
	f.close()
def print_info(G):
  #info prints name, type, number of nodes and edges, and average degree already
  print(nx.info(G))
  print "Density: ", nx.density(G)
  print "Number of connected components: ", nx.number_connected_components(G)

  all_degree_cent = nx.degree_centrality(G)
  all_bet_cent = nx.betweenness_centrality(G)
  all_close_cent = nx.closeness_centrality(G)
  
  oldest = []
  agerank = 0
  
  names = []
  
  print ("Node, Degree Centrality, Betweenness Centrality, Closeness Centrality:")
  for x in range(G.number_of_nodes()):
    names.append(G.nodes(data=True)[x][1]['label'])
    
    if G.nodes(data=True)[x][1]['agerank'] >= agerank:
      if G.nodes(data=True)[x][1]['agerank'] != agerank:
        oldest = [] 
        agerank = G.nodes(data=True)[x][1]['agerank']
        oldest.append(G.nodes(data=True)[x][1])
        
    print G.nodes(data=True)[x][1]['label'],' %.2f' % all_degree_cent.get(x),\
    ' %.2f' % all_bet_cent.get(x),\
    ' %.2f' % all_close_cent.get(x)
  
  print "Oldest facebook(s): ", ', '.join([x['label'] for x in oldest])

  return names
Exemple #20
0
def graph_comp_sequence(Gts):
   import networkx as nx
   """
   Gts is a graph thresholding sequence, a dictionary of graphs
   keyed by threshold distance, see edge_threshold_sequence().
   This function takes that sequence and returns the number of
   components in each graph, along with the diameter of the
   largest component in each graph. The output is a dictionary of
   tuples (NC, D(G)) keyed by threshold distance.

   Requires:  x_diameter(G), local function.

   Usage:  The output is intended to be printed to a file (see
   write_table.txt for syntax), so that a plot can be constructed
   that illustrates the number of components and graph diameter
   as a function of distance.

   DL Urban (22 Feb 2007)
   """

   seq = Gts.keys()
   gcs = {}
   for d in seq:
       g = Gts[d]
       if nx.is_connected(g):
           nc = 1
           diam = x_diameter(g)
       else:
           nc = nx.number_connected_components(g)
           # the largest connected component, #0 in the list:
           gc = nx.connected_component_subgraphs(g)[0]
           diam = x_diameter(gc)
       gcs[d] = (nc, diam)
   return gcs
Exemple #21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('graph')

    args = parser.parse_args()
    #vertices, edges = read_graph_from_file(args.graph)
    G = nx.read_edgelist(args.graph)
    n = G.number_of_nodes()
    print "nodes:", n
    print "edges:", G.number_of_edges()
    core_exponent = 0.5
    core_vertices = filter(lambda v: G.degree(v) >= n**core_exponent, G.nodes())
    print "core vertices:", len(core_vertices)
    core = G.subgraph(core_vertices)
    print "number of connected components in core:", nx.number_connected_components(core)

    # BFS-traversal
    fringe_fraction = 0.1
    max_fringe_size = int(n * fringe_fraction)
    core_vertices = set(core_vertices)
    for i in range(int(1/fringe_fraction)+1):
        fringe_vertices = set(sorted(fringe(G, core_vertices), key=lambda v: -G.degree(v))[:max_fringe_size])
        if not fringe_vertices:
            break
        print "{}: core={}, fringe={}".format(i+1, len(core_vertices), len(fringe_vertices))
        core_vertices |= fringe_vertices
 def stats(self):
     '''
     Return all other stats
     Params:
         None
     Returns:
         dictionary of stats with keys(stats supported):
             num_connections - total number of connections
             max_degree - degree of highest degree node
             mean_degree - average degree
             empty - number of nodes with no connections
             variance - variance in degree distribution
             odd_length - odd length cycle exist? - Not implemented
             num_connected_components - number of connected components
             any_frac - fraction of nodes with connection(s)
             big_frac - fraction of nodes in the largest connected group
     '''
     output = {}
     degrees = self.degree_dist()
     output['num_connections'] = int(sum([i * degrees[i] for i in degrees]) / 2.0)
     output['max_degree'] = max(degrees.keys())
     output['mean_degree'] = sum([i * degrees[i] for i in degrees]) / float(self.size) if self.size else 0
     output['empty'] = not output['max_degree']
     output['variance'] = sum([degrees[degree] * (degree - output['mean_degree'])**2 for degree in degrees]) / float(self.size) if self.size else 0
     # output['odd_length'] = 'Not implemented'
     output['num_connected_components'] = nx.number_connected_components(self.G)
     output['any_frac'] = sum([degrees[i] for i in degrees if i != 0]) / float(self.size) if self.size else 0
     if not output['empty']:
         Gcc = nx.connected_component_subgraphs(self.G)
         num_in_greatest = len(nx.to_dict_of_lists(self.G))
         output['big_frac'] = num_in_greatest / float(self.size) if self.size else 0
     else:
         output['big_frac'] = 0
     return output
Exemple #23
0
def graphToCSV(G,graphtype, section, test):
    directory = "Datarows/"+graphtype+"/"
    if not os.path.exists(directory):
        os.makedirs(directory)
    writer_true = csv.writer(open(directory+section+"_true.csv", "a"))
    writer_false = csv.writer(open(directory+section+"_false.csv", "a"))
    A = nx.to_numpy_matrix(G)
    A = np.reshape(A, -1)
    arrGraph = np.squeeze(np.asarray(A))

    nb_nodes = 0
    for node in nx.nodes_iter(G):
        if len(G.neighbors(node))>0:
            nb_nodes += 1

    meta_info = [test,nb_nodes,G.number_of_edges(),nx.number_connected_components(G)]
    # On garde la même taille d'élemt de valeur de vérité #
    if test:
        if os.path.getsize(directory+section+"_true.csv") <= os.path.getsize(directory+section+"_false.csv"):
            writer_true.writerow(np.append(arrGraph, meta_info))
            return True
        else:
            return False
    else:
        if os.path.getsize(directory+section+"_false.csv") <= os.path.getsize(directory+section+"_true.csv"):
            writer_false.writerow(np.append(arrGraph, meta_info))
            return True
        else:
            return False
def genMutants(G, params):
    """ 
    Returns a list of mutant networks obtained from the given network G, 
    using mutation parameters in params.
    """
    Vcount = len(G)
    Ecount = len(G.edges())
    mutants = []
    for i in range(params["mutantsPerEpoch"]):
        mutantG = G.copy()
        rewirings = 0
        while rewirings <= params["rewiringsPerMutant"]:
            u, v = mutantG.edges()[random.randrange(Ecount)]
            uNew = random.choice([u, v])
            vNew = random.randrange(Vcount)
            if uNew == vNew or mutantG.has_edge(uNew, vNew):
                continue
            mutantG.remove_edge(u, v)
            mutantG.add_edge(uNew, vNew)
            if networkx.number_connected_components(mutantG) > 1:
                mutantG.remove_edge(uNew, vNew)
                mutantG.add_edge(u, v)
            else:
                rewirings += 1
        mutants.append(mutantG)
    return mutants
	def constructPedigreeGraphFromOneFile(self, inputFname=None):
		"""
		2013.3.5 replace nx.DiGraph with custom DiGraphWrapper
		2012.8.14
		"""
		sys.stderr.write("Constructing pedigree-graph out of %s ..."%(inputFname))
		DG=DiGraphWrapper()
		childNodeSet = set()
		reader = MatrixFile(inputFname)
		
		counter = 0
		for row in reader:
			if counter ==0 and self.p_char.search(row[0]):	#character in 1st cell of 1st line, it's header skip.
				continue
			childID = row[self.childColumnIndex]
			DG.add_node(childID)	#in case this guy has no parents, then won't be added via add_edge()
			childNodeSet.add(childID)
			fatherID = row[self.fatherColumnIndex]
			if fatherID!='0':
				DG.add_edge(fatherID, childID)
			motherID = row[self.motherColumnIndex]
			if motherID!='0':
				DG.add_edge(motherID, childID)
			counter += 1
		del reader
		sys.stderr.write("%s children, %s nodes. %s edges. %s connected components.\n"%(\
										len(childNodeSet), DG.number_of_nodes(), DG.number_of_edges(), \
										nx.number_connected_components(DG.to_undirected())))
		return PassingData(DG=DG, childNodeSet=childNodeSet)
def get_characteristics(G, filename):
  import networkx as nx
  print 'calculating characteristics'
    
  n_nodes = nx.number_of_nodes(G)
  n_edges = nx.number_of_edges(G)
  n_components = nx.number_connected_components(G)
  print 'number of nodes:', n_nodes
  print 'number of edges:', n_edges
  print 'number of components:', n_components
 
  print 'degree histogram'
  check_sum = 0.
  degree_hist = {}
  for node in G:
    if G.degree(node) not in degree_hist:
      degree_hist[G.degree(node)] = 1
    else:
      degree_hist[G.degree(node)] += 1
    
  keys = degree_hist.keys()
  keys.sort()
  for item in keys:
    print item, degree_hist[item]
    check_sum += float(degree_hist[item])/float(n_nodes)
    
  print "check sum: %f" % check_sum
            
  #print 'clustering coefficient'
  print 'clustering coefficient of full network', nx.average_clustering(G)
  return 0
	def constructPedigreeGraphFromPOEdgeFile(self, inputFname=None):
		"""
		2012.8.23
			inputFname is output of vervet/src/pedigree/DiscoverParentOffspringFromPlinkIBD.py
		"""
		sys.stderr.write("Constructing pedigree-graph out of %s ..."%(inputFname))
		DG=nx.DiGraph()
		reader = None
		childNodeSet = set()
		reader = MatrixFile(inputFname)
		reader.constructColName2IndexFromHeader()
		
		parentIDIndex = reader.getColIndexGivenColHeader("parentID")
		childIDIndex = reader.getColIndexGivenColHeader("childID")
		distToPOVectorIndex = reader.getColIndexGivenColHeader("distToPOVector")
		counter = 0
		for row in reader:
			childID = row[childIDIndex]
			childNodeSet.add(childID)
			parentID = row[parentIDIndex]
			distToPOVector = float(row[distToPOVectorIndex])
			DG.add_edge(parentID, childID, weight=distToPOVector)
			counter += 1
		del reader
		sys.stderr.write("%s children, %s nodes. %s edges. %s connected components.\n"%(\
										len(childNodeSet), DG.number_of_nodes(), DG.number_of_edges(), \
										nx.number_connected_components(DG.to_undirected())))
		return PassingData(DG=DG, childNodeSet=childNodeSet)
    def __init__(self, fname, interactive=True):
        self.fname = fname
        self.graph = nx.read_gpickle(fname)
        
        #apply_workaround(self.graph, thr=1e-3)
        #remove_intersecting_edges(self.graph)

        print "Number of connected components:", \
                nx.number_connected_components(self.graph)

        self.selected_path_verts = []
        
        if interactive:
            self.fig = plt.figure()
            self.path_patch = None
            
            G_p = nx.connected_component_subgraphs(self.graph)[0]
            #G_p = nx.connected_component_subgraphs(prune_graph(self.graph))[0]

            plot.draw_leaf(G_p, fixed_width=True)

            plt.ion()
            plt.show()

            self.edit_loop()    
	   def connected_components(self, date, graph):
	   	   try:
	   	        self.connected_component_dict[date] = nx.number_connected_components(graph)
	   	        
	   	   except:
	   	   		print 'Connected Passed'
	   	   		raise
Exemple #30
0
def computeGenus(Data, gap=2**16):
    """    
    Main function to compute the genus given Data (list of links, N x 2 cotacting 
    pairs of genomic positions).    
    
    Creates a graph G to compute the number of connected components and genus. 
    First consider the case of no coincident ends for loop origins and terminations:  
     
    Then each end in the link list would split into two, "real" (r) and "ghost" (g) 
    where address of ghost on the real line is greater than address of the "real".
    
    Again, in the absence of coincident ends for each link: 
        
    1. The left ends "real" node shares an edge to the right end's "ghost" node
    
    2. The left ends "ghost" node shares an edge to the right end's "real" node, 
       exhausting edges correspoding to links 
       
    3. Along the real line, only "ghost" nodes connect by edge to "real" nodes, 
       in linear order, and in consecutive pairing along the real line (backbone)
        
    4. Count the number of original loops = P (before creating ghosts). Call it P 
    
    5. Count the number of loops (connected components) in the real + ghost graph, 
       call it L 
       
    6. genus :math:`g = (P - L)/2`  
    
    
    Now coming to resolving coincident ends in a manner that introduces no new 
    crossings and doesn't increase genus: 
         
    1. Coincident ends (with n link originating or terminating) will have to be 
       split into n real and n ghost nodes
       
    2. This splitting has to be done in an order such that the splitting itself 
       does not create new link crossings. 
    
    Need to have a strategy for creating nodes such that points are easily ordered. 
    
    Strategy: 
        
    1. Index all original link ends (nodes of G) by large even integers 
    
    2. Create ghosts on large odd numbers 
    
    3. Introduce new real nodes for coincident points in between these large even 
       numbers
       
    4. Ghosts' addresses are always s2 (here 1) greater than reals 
    
    5. gap/s1 (s1 is an option in splitAndModify() function) is the region within 
       which all coincident ends are resolved, increase it if there
       are too many coincident ends
       
    *Args:*
        
        Data: 
            Nx2 link data
        gap:
            Gap between addresses of nodes corresponding to the ends of links 
           
    *Returns:*
        genus:
            computed genus 
        G: 
            networkx graph for computing genus
        LoopData:
            The list of edges corresponding to mapping of links 
        backboneData:
            The list of edges corresponding to mapping of connectivity along the 
            genome 
    """

    # cleam up operations
    # step 1: Order, left < right point along rows,
    #order rows by left and then by right, so that coincident points have increasing right link
    Data = np.sort(
        np.asarray(Data),
        axis=1)  #sorted for each row so the left point is lesser than right
    Data, indx = uniquerows(Data)  #clean up operation, will use length of Data
    # in genus computation, better have all loops to be unique
    #print Data
    Data = Data[np.lexsort((
        Data[:, 1],
        Data[:,
             0]))]  #this soorts data by first column and then by second column

    G = nx.Graph()

    points = np.sort(np.unique(Data))  #unique points
    counter = dict.fromkeys(
        points, 0)  #this is the counter of number of coinident points
    address = dict(zip(points, np.arange(
        0,
        len(points) * gap,
        gap)))  #initialize dict of address for orignial points,
    #with the gapped index along the line
    #print address
    LoopData = []  #stores the data to plot in the p1 p2  format
    for p1, p2 in Data:  #order the loop data by left, right in chormosome position order
        if p1 == p2:
            raise ValueError('Loop cannot be zero length')

        G, counter, LoopData = splitAndModify(p1,
                                              p2,
                                              G,
                                              address,
                                              counter,
                                              LoopData,
                                              gap=gap)

    #now run through the points in order of position and introduce the real to ghost backbone edges

    sorted_graph = sorted(G.nodes(data=True), key=lambda (a, dct): dct['pos'])
    sorted_names = np.asarray([n for n, dct in sorted_graph])
    #recall, sorted names are real-ghost pairs
    backbone_ghosts = sorted_names[1:-1:
                                   2]  #the first and the last points always
    #create a cluster together, sorted_names[1] is the first ghost
    backbone_reals = sorted_names[2:-1:2]
    #error handling, check that backbone_reals are all reals, surely then all ghosts are ghosts
    test_kinds = np.asarray([G.node[n]['kind'] for n in backbone_reals],
                            dtype=str)
    #print backbone_reals
    #print backbone_ghosts
    #print sorted_names
    assert np.all(test_kinds == 'real'
                  ), 'Fatal Error, graph construction is wrong, change gap?'

    #these are guranteed to be of equal length, but for sanity, throw error otherwise
    assert len(backbone_ghosts) == len(
        backbone_reals), "Creation of ghosts was wrong"

    backboneData = []
    for p1, p2 in zip(backbone_ghosts, backbone_reals):
        G.add_edge(p1, p2)
        backboneData.append([p1, p2])

    genus = (len(Data) - (nx.number_connected_components(G) - 1)) / 2

    return genus, G, LoopData, backboneData
Exemple #31
0
    def execute(self, simplified=True):
        """

        :return:
        """
        # generate degree sequence
        self.__compute_degree_sequence()

        # generate community size dist
        exp_com_s = self.__compute_community_size_distribution()

        # assign node to community
        self.__node_to_community_initial_assignement(exp_com_s)

        # main loop (iteration)
        for self.it in tqdm.tqdm(range(0, self.iterations), ncols=100):

            # community check and event generation
            comp = nx.number_connected_components(self.graph)
            if comp <= len(self.communities):
                if self.__test_communities():
                    self.__generate_event(simplified)

            # node removal
            ar = random.random()
            if ar < self.del_node:
                self.__remove_node()

            # node addition
            ar = random.random()
            if ar < self.new_node:
                self.__add_node()

            # get nodes within communities that needs to adjust
            nodes = self.__get_nodes()

            # inner loop (nodes)
            for n in nodes:

                # discard deleted nodes
                if self.node_to_com[n] == -1:
                    continue

                # check for decayed edges
                removal = self.__get_vanished_edges(n)

                # removal phase
                for n1 in removal:
                    r = random.random()

                    # edge renewal phase
                    # check for intra/inter renewal thresholds
                    if r <= self.renewal and self.node_to_com[n1] == self.node_to_com[n]\
                            or r > self.renewal and self.node_to_com[n1] != self.node_to_com[n]:

                        # Exponential decay
                        timeout = (self.it + 1) + int(
                            random.expovariate(self.lambdad))
                        self.graph.adj[n][n1]["d"] = timeout

                    else:
                        # edge to be removed
                        self.out_interactions.write(
                            "%s\t%s\t-\t%s\t%s\n" %
                            (self.it, self.count, n, n1))
                        self.graph.remove_edge(n, n1)

                # expected degree reached
                if self.graph.degree(n) >= self.exp_node_degs[n]:
                    continue

                # decide if the node is active during this iteration
                action = random.random()

                # the node has not yet reached it expected degree and it acts in this round
                if self.graph.degree([n])[n] < self.exp_node_degs[n] and (
                        action <= self.paction or self.it == 0):

                    com_nodes = list(self.communities[self.node_to_com[n]])

                    # probability for intra/inter community edges
                    r = random.random()

                    # check if at least sigma% of the node link are within the community
                    s = self.graph.subgraph(com_nodes)
                    d = s.degree([n])[n]  # Intra community edges

                    if r <= self.sigma and d < len(com_nodes) - 1:
                        self.__new_intra_community_edge(s, n)

                    # inter-community edges
                    elif r > self.sigma:
                        # if self.exp_node_degs[n]-d < (1-self.sigma) * s.number_of_nodes():
                        self.__new_inter_community_edge(n)

        self.__output_communities()
        self.out_events.write(
            "%s\n\t%s\n" % (self.iterations, self.performed_community_action))
        self.out_interactions.flush()
        self.out_interactions.close()
        self.out_events.flush()
        self.out_events.close()
        return self.stable
def get_basic_information(g):
    print("'Data Mining Labs' network has {} active members with {} connections between each other."\
          .format(g.number_of_nodes(), g.number_of_edges()))
    print('Number of connected components = {}'.format(
        nx.number_connected_components(g)))
def graph_exploration():
    DG = load_obj('DG_train')
    print(DG.number_of_nodes(), DG.number_of_edges(),
          nx.number_connected_components(DG.to_undirected()))
Exemple #34
0
        G.add_edge(int(head), int(tail))
        time[int(head)] = float(rtt)

    # get largest component and assign ping times to G0time dictionary
    Gcc = sorted(nx.connected_components(G), key=len, reverse=True)[0]
    G0 = G.subgraph(Gcc)
    G0.rtt = {}
    for n in G0:
        G0.rtt[n] = time[n]

    return G0


G = lanl_graph()

print(G)
print(nx.number_connected_components(G), "connected components")

plt.figure(figsize=(8, 8))
# use graphviz to find radial layout
pos = graphviz_layout(G, prog="twopi", root=0)
# draw nodes, coloring by rtt ping time
options = {"with_labels": False, "alpha": 0.5, "node_size": 15}
nx.draw(G, pos, node_color=[G.rtt[v] for v in G], **options)
# adjust the plot limits
xmax = 1.02 * max(xx for xx, yy in pos.values())
ymax = 1.02 * max(yy for xx, yy in pos.values())
plt.xlim(0, xmax)
plt.ylim(0, ymax)
plt.show()
Exemple #35
0
def report_on(g_dir, g_file):
    g = nx.read_graphml(os.path.join(g_dir, g_file))
    print('%s,%d,%d,%d' %
          (utils.extract_filename(g_file), g.number_of_nodes(),
           g.number_of_edges(), nx.number_connected_components(g)))
 def _is_valid(self, i, j, k, l, graph):
     g = graph.copy()
     g.remove_edge(i, j)
     g.remove_edge(k, l)
     return nx.number_connected_components(g) == 2
Exemple #37
0
def _create_sorted_contour(yxcontour, center=None, rot=0, N=100, display=False):
	try:
		
		cc = [[1,0],[1,1],[0,1],[-1,1],[-1,0],[-1,-1],[0,-1],[1,-1]]
		
		# nearest neighbor graph
		import networkx as nx
		dist = cdist(yxcontour,yxcontour)	
		dist[dist >= 2] = 0
		G = nx.from_numpy_matrix(dist)
				
		# in rare cases there can be several subgraph, the largest is then selected
		if nx.number_connected_components(G) > 1:
			GG = [G.subgraph(c) for c in nx.connected_components(G)]
			G = GG[np.argmax([nx.number_of_nodes(g) for g in GG])]
		

		# Let's choose n1->n2, the first edge of the contour
		# n1 should be in G and the most south-left point 

		Gnodes = np.array(G.nodes())

		miny_idx = np.where(yxcontour[Gnodes,0] == np.min(yxcontour[Gnodes,0]))[0]
		n1 = Gnodes[miny_idx][np.argmin(yxcontour[Gnodes][miny_idx,1])]
		
		# n2 is the first neighbor of n1 after direct rotation from [-1,-1], to ensure tracing the outside contour
		CC = [list(a) for a in np.roll(cc, -cc.index([-1,-1]), axis=0)[1:]]
		V = [list(yxcontour[ni] - yxcontour[n1]) for ni in G.neighbors(n1)]
		n2 = list(G.neighbors(n1))[np.argmin([CC.index(vi) for vi in V])]

		nfirst = n1
		nsec   = n2
		nnnyx  = yxcontour[nfirst] - 1
		
		G2 = nx.DiGraph() # directed graph
		G2.add_node(n1)
		G2.add_node(n2)
		G2.add_edge(n1,n2)
		while(n2 != nfirst): 
			neigh = list(G.neighbors(n2))
			neigh.remove(n1)
			
			if len(neigh) == 0:
				tmp=n2
				n2=n1
				n1=tmp
			elif len(neigh) == 1:
				n1=n2
				n2=neigh[0]
			else:
				v = list(yxcontour[n1] - yxcontour[n2])
				CC = [list(a) for a in np.roll(cc, -cc.index(v), axis=0)[1:]]
				V = [list(yxcontour[ni] - yxcontour[n2]) for ni in neigh]
				n1 = n2
				n2 = neigh[np.argmin([CC.index(vi) for vi in V])]
			G2.add_node(n1)
			G2.add_node(n2)
			G2.add_edge(n1,n2)

		# in rare cases there can be more than a cycle (=for eg when two cycles connected by a single node), the largest is kept
		cycles = tuple(nx.simple_cycles(G2))
		G3 = nx.DiGraph()
		nx.add_cycle(G3, cycles[np.argmax([len(cy) for cy in cycles])])
		
		# Selection of nstart in G3: the closest angle to rot given the center		
		rot = convert_angle(rot) # to make sure it is between -pi and +pi
		G3nodes = np.array(G3.nodes())
		trcontour = np.vstack(cart2pol(yxcontour[G3nodes,1] - center[1], yxcontour[G3nodes,0] - center[0])).T
		idxSortedcontour = np.argsort(trcontour[:,0])
		_idx = np.searchsorted(trcontour[:,0], rot, sorter=idxSortedcontour)
		nstart = G3nodes[idxSortedcontour][_idx if _idx<idxSortedcontour.shape[0] else 0]

		path  = np.array(nx.find_cycle(G3, source=nstart)) # contour starting from the rotated init
		pathd = [dist[u,v] for (u,v) in path]
		cumsum = np.copy(pathd)
		
		for k in range(1,cumsum.shape[0]):
			cumsum[k] += cumsum[k-1]
		
		short_contour_idx = path[:,0][np.searchsorted(cumsum, np.arange(N)*cumsum[-1]/N)]

		if display:
			import matplotlib.pyplot as plt
			fig = plt.figure()
			ax = fig.add_subplot(1, 1, 1)

			G4 = nx.Graph()
			nodes = list(range(short_contour_idx.shape[0]))
			G4.add_nodes_from(nodes)
			G4.add_edges_from(np.array((nodes,np.roll(nodes, 1))).T)
			
			nx.draw(G, yxcontour, node_size=20, node_color ='k', edge_color='k')
			nx.draw(G3, yxcontour, node_size=20, node_color ='y', edge_color='y', with_labels=True)
			nx.draw(G4, yxcontour[short_contour_idx], node_size=20, node_color ='g', edge_color='g')
			plt.plot(yxcontour[0,0], yxcontour[0,1],'ob')
			plt.plot(yxcontour[nfirst,0], yxcontour[nfirst,1],'dg',ms=20)
			plt.plot(yxcontour[nsec,0], yxcontour[nsec,1],'Dg',ms=20)
			plt.plot(nnnyx[0], nnnyx[1],'or',ms=20)
			plt.plot(yxcontour[path[0,0],0], yxcontour[path[0,0],1],'or')
			plt.axis('equal')
			plt.show()
			plt.close()
			import pdb
			pdb.set_trace()

		return yxcontour[short_contour_idx]
	except nx.NetworkXError as e:
		print('utils.py - _create_sorted_contour() - nx.NetworkXError: %s' % e)
		return None
	except ValueError as e:
		print('utils.py - _create_sorted_contour() - ValueError : %s' % e)
		import pdb
		pdb.set_trace()
		return None
	except IndexError as e: 
		print('utils.py - _create_sorted_contour() - IndexError : %s' % e)
		return None
	except MemoryError as e: 
		print('utils.py - _create_sorted_contour() - MemoryError : %s' % e)
		import pdb
		pdb.set_trace()
		return None
Exemple #38
0
def main():
    # Directed Bison Network
    bison_file = 'moreno_bison/out.moreno_bison_bison'
    bison_graph = nx.DiGraph()

    create_network(bison_graph, bison_file, True)

    # Undirected Kangaroo Network
    kangaroo_file = 'moreno_kangaroo/out.moreno_kangaroo_kangaroo'
    kangaroo_graph = nx.Graph()

    create_network(kangaroo_graph, kangaroo_file, False)

    # Part A: Connected Component Analysis
    # Connected Component Analysis of Bison Directed Graph
    print("PART A:\n")
    print("Bison Directed Graph Connected Component Analysis",
          "\nWeakly connected: ", nx.is_weakly_connected(bison_graph),
          "\nNumber of Weakly CCs: ",
          nx.number_weakly_connected_components(bison_graph),
          "\nSize of largest CC: ",
          len(max(nx.weakly_connected_components(bison_graph),
                  key=len)), "\nSize of smallest CC: ",
          len(min(nx.weakly_connected_components(bison_graph), key=len)))

    # Connected Component Analysis of Kangaroo Undirected Graph
    print("\nKangaroo Undirected Graph Connected Component Analysis",
          "\nConnected: ", nx.is_connected(kangaroo_graph),
          "\nNumber of CCs: ", nx.number_connected_components(kangaroo_graph),
          "\nSize of largest CC: ",
          len(max(nx.connected_components(kangaroo_graph),
                  key=len)), "\nSize of smallest CC: ",
          len(min(nx.connected_components(kangaroo_graph), key=len)))

    # Part B Computing Degrees and finding the Probability distribution
    # Creation of an arrayList to store the degree for each node of Bison Network
    bison_degrees = []
    for node in range(1, 26):
        bison_degrees.append(bison_graph.degree(node))

    # Computing Mean and Standard Deviation for Directed
    x_label = stats(bison_degrees)

    # Creating a Histogram to plot the data of the degrees Bison Network
    plt.figure(3)
    plt.title('Part B: Histogram Directed Bison')
    plt.xlabel(x_label)
    plt.hist(bison_degrees, bins='auto')

    # Creation of an arrayList to store the degree for each node of Kangaroo Network
    kangaroo_degrees = []
    for node in range(1, 17):
        kangaroo_degrees.append(kangaroo_graph.degree(node))

    # Computing Mean and Standard Deviation for Undirected
    x_label = stats(kangaroo_degrees)

    # Creating a Histogram to plot the data of the degrees for Kangaroo Network
    plt.figure(4)
    plt.title('Part B: Histogram Undirected Kangaroo')
    plt.xlabel(x_label)
    plt.hist(kangaroo_degrees, bins='auto')
    # lt.show()

    # Part C Find the Path between 2 abritrary vertices in the largest CC
    # Creating two arbritrary nodes making sure they aren't the same number
    node1 = random.randrange(1, 27, 1)
    node2 = random.randrange(1, 27, 1)
    while node1 == node2:
        node1 = random.randrange(1, 27, 1)

    # I put a cutoff on the list of simple paths for now so I can atleast run something
    # cut off is the act of only focusing on the paths <= 5
    # This section creates a list of all simple paths and then creates a list with the lengths of these paths
    bison_paths = list(nx.all_simple_paths(bison_graph, node1, node2,
                                           cutoff=5))
    bison_p_lengths = []

    for node in range(0, len(bison_paths) - 1):
        bison_p_lengths.append(len(bison_paths[node]))

    x_label = stats(bison_p_lengths)

    # Creating a histogram for the degrees of the graph
    plt.figure(5)
    plt.title('Part C: Histogram Directed Bison Paths')
    plt.xlabel(x_label)
    plt.hist(bison_p_lengths, bins='auto')
    # plt.show()

    # Creating two arbitrary nodes making sure they aren't the same number
    node1 = random.randrange(1, 17, 1)
    node2 = random.randrange(1, 17, 1)
    while node1 == node2:
        node1 = random.randrange(1, 17, 1)

    # This section creates a list of all simple paths and then creates a list with the lengths of these paths
    kangaroo_paths = list(
        nx.all_simple_paths(kangaroo_graph, node1, node2, cutoff=5))
    kangaroo_p_lengths = []

    for node in range(0, len(kangaroo_paths) - 1):
        kangaroo_p_lengths.append(len(kangaroo_paths[node]))

    x_label = stats(kangaroo_p_lengths)

    # Creating a histogram for the degrees of the graph
    plt.figure(6)
    plt.title('Part C: Histogram Undirected Kangaroo Paths')
    plt.xlabel(x_label)
    plt.hist(kangaroo_p_lengths, bins='auto')

    # plt.show()

    # Part D Find the Simple Circuits between 2 abritrary vertices in the largest CC
    # UNABLE TO RUN BISON CIRCUITS ON LAPTOP THERE ARE TO MANY AND I CANNOT CREATE A CUTOFF

    # Creates a list of simple cycles and then creates another list of the lengths of the cycles
    # bison_circuits = list(nx.simple_cycles(bison_graph))
    # bison_c_lengths = []
    # for node in range(0,len(bison_circuits)-1):
    #    bison_c_lengths.append(len(bison_circuits[node]))
    #
    # x_label = stats(bison_c_lengths)
    #
    # plt.figure(7)
    # plt.title('PART D: Histogram Directed Bison Circuits')
    # plt.xlabel(x_label)
    # plt.hist(bison_c_lengths, bins = 'auto')

    # You can't use the simple cycle function for undirected graphs so I used the basis function.
    # Creates a list of simple cycles and then creates another list of the lengths of the cycles
    kangaroo_circuits = nx.cycle_basis(kangaroo_graph)
    kangaroo_c_lengths = []
    for node in range(0, len(kangaroo_circuits) - 1):
        kangaroo_c_lengths.append(len(kangaroo_circuits[node]))

    x_label = stats(kangaroo_c_lengths)
    plt.figure(7)
    plt.title('PART D: Histogram Undirected Kangaroo Circuits')
    plt.xlabel(x_label)
    plt.hist(kangaroo_c_lengths, bins='auto')
    # plt.show()

    # Part E Check if Eulerian, Find a Eulerian Path
    print("\nPART E:")
    print("\nDirected Bison Graph")
    print("Euelerian: ", nx.is_eulerian(bison_graph))
    print("Has a Eulerian Path: ", nx.has_eulerian_path(bison_graph))

    print("\nUndirected Kangaroo Graph")
    print("Euelerian: ", nx.is_eulerian(kangaroo_graph))
    print("Has a Eulerian Path: ", nx.has_eulerian_path(kangaroo_graph))

    # Part F: Convert to Matrix.
    # I don't know if this covers everything?
    bison_matrix = nx.to_numpy_matrix(bison_graph)
    plt.matshow(bison_matrix)
    # plt.show()

    kangaroo_matrix = nx.to_numpy_matrix(kangaroo_graph)
    plt.matshow(kangaroo_matrix)
    # plt.show()

    # Part G: Copy Largest CC comparing it to a copy and a slightly different CC
    print("\nPart G:\n")
    # copying the largest connected component from the Bison Directed graph
    bison_n1 = nx.Graph()
    largest_cc_bison = list(
        max(nx.weakly_connected_components(bison_graph), key=len))
    for i in largest_cc_bison:
        bison_n1.add_edge(i, i + 1)
    bison_n2 = bison_n1.copy()

    # Checking Equivalence between copied graphs
    print("Is bison_n1 Equivalent to bison_n2?")
    compare(bison_n1, bison_n2)

    # Checking Equivalence between copied graphs but one has an extra 10 edges
    print("\nIs bison_n1 Equivalent to N3?")
    bison_n3 = bison_n2.copy()
    add_10_edges(bison_n3, len(bison_n3))
    compare(bison_n1, bison_n3)

    # Repeat for Kangaroo Undirected Network
    kangaroo_n1 = nx.Graph()
    largest_cc_kangaroo = list(
        max(nx.connected_components(kangaroo_graph), key=len))
    for i in largest_cc_kangaroo:
        kangaroo_n1.add_edge(i, i + 1)
    kangaroo_n2 = kangaroo_n1.copy()

    print("\nIs kangaroo_n1 Equivalent to kangaroo_n2?")
    compare(kangaroo_n1, kangaroo_n2)

    print("\nIs kangaroo_n1 Equivalent to N3?")
    kangaroo_n3 = kangaroo_n2.copy()
    add_10_edges(kangaroo_n3, len(kangaroo_n3))
    compare(kangaroo_n1, kangaroo_n3)

    # Part H: Generate Minimum Spanning Tree
    print("\nPart H:\n")
    # Cannot generate SPanning tree for Directed networks
    # Generating a minimum spanning tree for Undirected network
    kangaroo_min_tree = nx.minimum_spanning_tree(kangaroo_graph)
    print(
        "~A Minimum Spanning Tree was created for the Undirected Kangaroo Graph~"
    )
    tree_or_forest(kangaroo_min_tree)

    # Finding two random nodes that are connected
    x = 0
    y = 0
    while (not (kangaroo_min_tree.has_edge(x, y))):
        x = random.randrange(1, 17, 1)
        y = random.randrange(1, 17, 1)
        while x == y:
            x = random.randrange(1, 17, 1)

    # Removing the found edge
    print("\nAn edge from the spanning tree was removed")
    kangaroo_min_tree.remove_edge(x, y)
    tree_or_forest(kangaroo_min_tree)

    # Part I: Dijkstra's Algorithm

    bison_pairs = list(nx.all_pairs_node_connectivity(bison_graph))
    connected_nodes = []
    for i in bison_pairs:
        for j in bison_pairs:
            if bison_graph.has_edge(i, j + 1):
                connected_nodes.append([i, j + 1])

    dijkstra_paths = []
    length = len(connected_nodes)
    for i in range(0, length - 1):
        for j in range(0, 1):
            dijkstra_paths.append(
                int(
                    nx.dijkstra_path_length(bison_graph, connected_nodes[i][j],
                                            connected_nodes[i][j + 1])))

    x_label = stats(dijkstra_paths)

    plt.figure()
    plt.xlabel(x_label)
    plt.title('Directed Bison Dijkstra Path Lengths')
    plt.hist(dijkstra_paths)
    # plt.show()

    #Created a new temporary graph with edges from the connected nodes and weights from the distance list
    temp_bison = nx.DiGraph()
    for i in range(0, length - 1):
        j = 0
        temp_bison.add_edge(connected_nodes[i][j],
                            connected_nodes[i][j + 1],
                            weight=dijkstra_paths[i])

    # I dont really know if this creates a matrix for the weigths this is just what i did in a previous part
    bison_distance_matrix = nx.to_numpy_matrix(temp_bison)
    plt.matshow(bison_distance_matrix)
    plt.show()

    # Repeat for Kangaroo Undirected

    KangarooPairs = list(nx.all_pairs_node_connectivity(KangarooGraph))
    ConnectedNodesK = []
    for i in KangarooPairs:
        for j in KangarooPairs:
            if KangarooGraph.has_edge(i, j + 1):
                ConnectedNodesK.append([i, j + 1])

    dijkstra_PathsK = []
    length = len(ConnectedNodesK)
    for i in range(0, length):
        dijkstra_PathsK.append(
            int(
                nx.dijkstra_path_length(KangarooGraph, ConnectedNodesK[i][0],
                                        ConnectedNodesK[i][1])))
    xLabel = Stats(dijkstra_PathsK)

    plt.figure()
    plt.xlabel(xLabel)
    plt.title('Undirected Kangaroo Dijkstra Path Lengths')
    plt.hist(dijkstra_PathsK)
    plt.show()

    temp_kangaroo = nx.Graph()
    for i in range(0, length - 1):
        j = 0
        temp_kangaroo.add_edge(ConnectedNodesK[i][j],
                               ConnectedNodesK[i][j + 1],
                               weight=dijkstra_PathsK[i])

    kangaroo_distance_matrix = nx.to_numpy_matrix(temp_kangaroo)
    plt.matshow(kangaroo_distance_matrix)
    plt.show()
def compute_plotmountains(G, sname):
    # print graphname
    orig_core_nums = nx.core_number(G)
    print G.number_of_nodes(), G.number_of_edges(), max(
        orig_core_nums.values())
    print 'core nos computed'

    # Initializing node_CNdrops_mountainassignment
    # 'node_CNdrops_mountainassignment' is a dict where keys are nodeIDS
    # Each value is tuple of the maximum drop in core number observed for this node and the mountain to which it is assigned.
    node_CNdrops_mountainassignment = {}
    for n in G.nodes():
        node_CNdrops_mountainassignment[n] = [
            0, -1
        ]  #diff in core number, assignment to a mountain

    H = G.copy()
    H_nodes = set(G.nodes())

    current_core_nums = orig_core_nums.copy()
    current_d = max(current_core_nums.values())
    print 'current_d = ', current_d

    # 'current_plotmountain_id' keeps track of numbering of the plot-mountains
    current_plotmountain_id = 0
    peak_numbers = {}

    # Each iteration of the while loop finds a k-contour
    while (len(H.nodes()) > 0):

        # degen_core is the degeneracy of the graph
        degen_core = nx.k_core(H)  # Degen-core

        # Note that the actual mountains may consist of multiple components.
        # To compute their core-periphery values or to analyze each component,
        # use the following line to find the components
        res_core_comps = nx.connected_component_subgraphs(
            degen_core)  #The comps in Degen-core
        print('components', nx.number_connected_components(degen_core),
              len(degen_core))
        # But in the mountain plot we plot the separate components related to a k-contour as a single mountain.
        # So, ignore the components for making mountain plots

        # Nodes in the k-contour. Their current core number is their peak number.
        for comp in res_core_comps:
            #kcontour_nodes = degen_core.nodes()
            kcontour_nodes = comp.nodes()
            for n in kcontour_nodes:
                peak_numbers[n] = current_core_nums[n]

            # Removing the kcontour (i.e. degeneracy) and re-computing core numbers.
            H_nodes = H_nodes.difference(set(kcontour_nodes))
            H = G.subgraph(list(H_nodes))
            new_core_nums = nx.core_number(H)

            for n in kcontour_nodes:
                # For the nodes in kcontour, its removal causes its core number to drop to 0.
                # Checking is this drop is greater than the drop in core number observed for these nodes in previous iterations
                if current_core_nums[n] - 0 > node_CNdrops_mountainassignment[
                        n][0]:
                    node_CNdrops_mountainassignment[n][0] = current_core_nums[
                        n]
                    node_CNdrops_mountainassignment[n][
                        1] = current_plotmountain_id

            for n in new_core_nums:
                # Checking is this drop is greater than the drop in core number observed for these nodes in previous iterations
                if current_core_nums[n] - new_core_nums[
                        n] > node_CNdrops_mountainassignment[n][0]:
                    node_CNdrops_mountainassignment[n][
                        0] = current_core_nums[n] - new_core_nums[n]
                    node_CNdrops_mountainassignment[n][
                        1] = current_plotmountain_id

            current_plotmountain_id += 1
            current_core_nums = new_core_nums.copy()

    print 'peak nos computed'

    plot_mountains(node_CNdrops_mountainassignment, orig_core_nums,
                   peak_numbers, G, sname)
def mask_test_edges(adj,
                    test_frac=.1,
                    val_frac=.05,
                    prevent_disconnect=True,
                    verbose=False):
    # NOTE: Splits are randomized and results might slightly deviate from reported numbers in the paper.

    # Remove diagonal elements
    adj = adj - sp.dia_matrix(
        (adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()
    # Check that diag is zero:
    assert np.diag(adj.todense()).sum() == 0

    g = nx.from_scipy_sparse_matrix(adj)
    orig_num_cc = nx.number_connected_components(g)

    adj_triu = sp.triu(adj)  # upper triangular portion of adj matrix
    adj_tuple = sparse_to_tuple(
        adj_triu)  # (coords, values, shape), edges only 1 way
    edges = adj_tuple[0]  # all edges, listed only once (not 2 ways)
    # edges_all = sparse_to_tuple(adj)[0] # ALL edges (includes both ways)
    num_test = int(
        np.floor(edges.shape[0] *
                 test_frac))  # controls how large the test set should be
    num_val = int(
        np.floor(edges.shape[0] *
                 val_frac))  # controls how alrge the validation set should be

    # Store edges in list of ordered tuples (node1, node2) where node1 < node2
    edge_tuples = [(min(edge[0], edge[1]), max(edge[0], edge[1]))
                   for edge in edges]
    all_edge_tuples = set(edge_tuples)
    train_edges = set(edge_tuples)  # initialize train_edges to have all edges
    test_edges = set()
    val_edges = set()

    # Iterate over shuffled edges, add to train/val sets
    np.random.shuffle(edge_tuples)
    counter = 0
    for edge in edge_tuples:
        counter += 1
        if counter % 100 == 0:
            print("processed:" + str(counter))
        # print edge
        node1 = edge[0]
        node2 = edge[1]

        # If removing edge would disconnect a connected component, backtrack and move on
        g.remove_edge(node1, node2)
        if prevent_disconnect == True:
            if nx.number_connected_components(g) > orig_num_cc:
                g.add_edge(node1, node2)
                continue

        # Fill test_edges first
        if len(test_edges) < num_test:
            test_edges.add(edge)
            train_edges.remove(edge)

        # Then, fill val_edges
        elif len(val_edges) < num_val:
            val_edges.add(edge)
            train_edges.remove(edge)

        # Both edge lists full --> break loop
        elif len(test_edges) == num_test and len(val_edges) == num_val:
            break

    if (len(val_edges) < num_val or len(test_edges) < num_test):
        print(
            "WARNING: not enough removable edges to perform full train-test split!"
        )
        print("Num. (test, val) edges requested: (", num_test, ", ", num_val,
              ")")
        print("Num. (test, val) edges returned: (", len(test_edges), ", ",
              len(val_edges), ")")

    if prevent_disconnect == True:
        assert nx.number_connected_components(g) == orig_num_cc

    if verbose == True:
        print('creating false test edges...')

    test_edges_false = set()
    while len(test_edges_false) < num_test:
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue

        false_edge = (min(idx_i, idx_j), max(idx_i, idx_j))

        # Make sure false_edge not an actual edge, and not a repeat
        if false_edge in all_edge_tuples:
            continue
        if false_edge in test_edges_false:
            continue

        test_edges_false.add(false_edge)

    if verbose == True:
        print('creating false val edges...')

    val_edges_false = set()
    while len(val_edges_false) < num_val:
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue

        false_edge = (min(idx_i, idx_j), max(idx_i, idx_j))

        # Make sure false_edge in not an actual edge, not in test_edges_false, not a repeat
        if false_edge in all_edge_tuples or false_edge in test_edges_false or false_edge in val_edges_false:
            continue

        val_edges_false.add(false_edge)

    if verbose == True:
        print('creating false train edges...')

    train_edges_false = set()
    while len(train_edges_false) < len(train_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue

        false_edge = (min(idx_i, idx_j), max(idx_i, idx_j))

        # Make sure false_edge in not an actual edge, not in test_edges_false,
        # not in val_edges_false, not a repeat
        if false_edge in all_edge_tuples or false_edge in test_edges_false or false_edge in val_edges_false or false_edge in train_edges_false:
            continue

        train_edges_false.add(false_edge)

    if verbose == True:
        print('final checks for disjointness...')

    # assert: false_edges are actually false (not in all_edge_tuples)
    assert test_edges_false.isdisjoint(all_edge_tuples)
    assert val_edges_false.isdisjoint(all_edge_tuples)
    assert train_edges_false.isdisjoint(all_edge_tuples)

    # assert: test, val, train false edges disjoint
    assert test_edges_false.isdisjoint(val_edges_false)
    assert test_edges_false.isdisjoint(train_edges_false)
    assert val_edges_false.isdisjoint(train_edges_false)

    # assert: test, val, train positive edges disjoint
    assert val_edges.isdisjoint(train_edges)
    assert test_edges.isdisjoint(train_edges)
    assert val_edges.isdisjoint(test_edges)

    if verbose == True:
        print('creating adj_train...')

    # Re-build adj matrix using remaining graph
    adj_train = nx.adjacency_matrix(g)

    # Convert edge-lists to numpy arrays
    train_edges = np.array([list(edge_tuple) for edge_tuple in train_edges])
    train_edges_false = np.array(
        [list(edge_tuple) for edge_tuple in train_edges_false])
    val_edges = np.array([list(edge_tuple) for edge_tuple in val_edges])
    val_edges_false = np.array(
        [list(edge_tuple) for edge_tuple in val_edges_false])
    test_edges = np.array([list(edge_tuple) for edge_tuple in test_edges])
    test_edges_false = np.array(
        [list(edge_tuple) for edge_tuple in test_edges_false])

    # NOTE: these edge lists only contain single direction of edge!
    return adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false
Exemple #41
0
import networkx
from networkx.drawing.nx_pydot import write_dot


with open('day12-input.txt') as f:
    lines = f.readlines()

    graph = networkx.Graph()

    for line in lines:
        line = str.replace(line, ' ', '')
        node, neighbours = line.strip().split('<->')
        for neighbour in neighbours.split(','):
            graph.add_edge(node, neighbour)

    write_dot(graph, 'day12-graph.dot')
    print(len(networkx.node_connected_component(graph, '0')))
    print(networkx.number_connected_components(graph))
Exemple #42
0
def words_graph():
    """Return the words example graph from the Stanford GraphBase"""
    fh = gzip.open('words4_dat.txt.gz', 'r')
    words = set()
    for line in fh.readlines():
        line = line.decode()
        if line.startswith('*'):
            continue
        w = str(line[0:4])
        words.add(w)
    return generate_graph(words)


if __name__ == '__main__':
    G = words_graph()
    print("Loaded words_dat.txt containing 5757 five-letter English words.")
    print("Two words are connected if they differ in one letter.")
    print("Graph has %d nodes with %d edges"
          % (nx.number_of_nodes(G), nx.number_of_edges(G)))
    print("%d connected components" % nx.number_connected_components(G))

    for (source, target) in [('cold', 'warm'),
			     ('love', 'hate')]:
        print("Shortest path between %s and %s is" % (source, target))
        try:
            sp = nx.shortest_path(G, source, target)
            for n in sp:
                print(n)
        except nx.NetworkXNoPath:
            print("None")
# libraries
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

data = pd.read_csv('../data/edges_with_weight/edges_with_weight.csv', ';')

print(data)

# Build your graph. Note that we use the DiGraph function to create the graph!
G = nx.from_pandas_edgelist(data,
                            'src',
                            'dest',
                            create_using=nx.DiGraph(),
                            edge_attr='weight')

#plt.figure(1, figsize=(10,10))

# Make the graph
#nx.draw(G, with_labels=False, node_size=100, alpha=0.6, arrows=True, font_size=8, pos=nx.kamada_kawai_layout(G))

#plt.show()

G = nx.from_pandas_edgelist(data,
                            'src',
                            'dest',
                            create_using=nx.Graph(),
                            edge_attr='weight')
print nx.number_connected_components(G)
Exemple #44
0
def graphInfo(graph,
              weighted=2,
              path_lengths=False,
              nodeInfo=False,
              edgeInfo=False):
    """
	Give a Basic Analysis of the Graph
	weighted = {0:"only unweighted",1:"only weighted",else:"both weighted and unweighted"}
	path_lengths = {True:""}
	"""
    graph_info = {}

    nfnodes = graph.number_of_nodes()
    nfedges = graph.number_of_edges()
    nfComponents = nx.number_connected_components(graph)
    density = nx.density(graph)

    graph_info = {
        "nfnodes": nfnodes,
        "nfedges": nfedges,
        "nfComponents": nfComponents,
        "density": density
    }

    if weighted == 0:
        unweighted_size = graph.size(weight=None)
        graph_info['unweighted_size'] = unweighted_size
    elif weighted == 1:
        weighted_size = graph.size(weight="weight")
        graph_info['weighted_size'] = weighted_size
    else:
        unweighted_size = graph.size(weight=None)
        weighted_size = graph.size(weight="weight")
        graph_info['unweighted_size'] = unweighted_size
        graph_info['weighted_size'] = weighted_size

    max_unweighted_node_degree = 0
    max_weighted_node_degree = 0

    if weighted == 0 and nodeInfo == True:
        sorted_nodes_by_unweighted_degree = sortNodesByDegree(graph,
                                                              weight=None,
                                                              reverse=True)
        if nfnodes >= 2:
            max_unweighted_node_degree = sorted_nodes_by_unweighted_degree[0]
            graph_info[
                'max_unweighted_node_degree'] = max_unweighted_node_degree

    elif weighted == 1:

        if nfedges > 1 and edgeInfo == True:
            sorted_edges = sortEdgesByWeight(graph)
            max_edge_weight = None
            max_edge_weight = sorted_edges[0]
            graph_info['max_edge_weight'] = max_edge_weight

        if nfnodes >= 2 and nodeInfo == True:
            sorted_nodes_by_weighted_degree = sortNodesByDegree(
                graph, weight="weight", reverse=True)
            max_weighted_node_degree = sorted_nodes_by_weighted_degree[0]
            graph_info['max_weighted_node_degree'] = max_weighted_node_degree

    elif weighted == 2:
        sorted_edges = sortEdgesByWeight(graph)
        sorted_nodes_by_weighted_degree = sortNodesByDegree(graph,
                                                            weight="weight",
                                                            reverse=True)
        sorted_nodes_by_unweighted_degree = sortNodesByDegree(graph,
                                                              weight=None,
                                                              reverse=True)
        max_edge_weight = None

        if nfedges > 1 and edgeInfo == True:
            max_edge_weight = sorted_edges[0]
            graph_info['max_edge_weight'] = max_edge_weight

        if nfnodes >= 2 and nodeInfo == True:
            max_unweighted_node_degree = sorted_nodes_by_unweighted_degree[0]
            max_weighted_node_degree = sorted_nodes_by_weighted_degree[0]
            graph_info[
                'max_unweighted_node_degree'] = max_unweighted_node_degree
            graph_info['max_weighted_node_degree'] = max_weighted_node_degree

    weighted_avg_path_length = math.inf
    unweighted_avg_path_length = math.inf

    if nfComponents == 1 and path_lengths == True:

        weighted_avg_path_length = nx.average_shortest_path_length(
            graph, weight="weight")
        unweighted_avg_path_length = nx.average_shortest_path_length(
            graph, weight=None)
        graph_info['weighted_avg_path_length'] = weighted_avg_path_length
        graph_info['unweighted_avg_path_length'] = unweighted_avg_path_length

    return graph_info
Exemple #45
0
        self.trainNodes = np.arange(min(testIdx))
        self.trainNodesWithLabel = np.arange(len(x))


if __name__ == "__main__":
    cora = dataGenerator("cora")
    graph = nx.Graph()
    graph.add_node(1)
    graph.add_node(2)
    graph.add_node(7)
    graph.add_node(4)
    graph.add_node(5)

    graph.add_edge(1, 2)
    graph.add_edge(1, 4)
    graph.add_edge(1, 7)
    graph.add_edge(1, 5)
    graph.add_edge(4, 7)
    graph.add_edge(4, 5)

    graph.add_node(6)
    graph.add_node(8)
    graph.add_edge(6, 8)

    print(nx.number_connected_components(graph))

    graph = graph.subgraph([1, 2, 4, 5])
    nx.draw(graph, with_labels=True, font_weight='bold')
    # nx.draw_shell(graph)
    plt.show()
Exemple #46
0
def main(argv):
	" main function"	
	global g_name,d_component,n,k,T_max,svd_routine,svd_parameter,seed,max_n,J0,J2,bias,beta,dynamic_type;
	H=nx.Graph();
	try:
		opts, args=getopt.getopt(argv,"hg:d:n:k:t:s:p:i:m:J:j:b:B:e:","help");
	except getopt.GetoptError:
		tl_main.usage();
		sys.exit(2);	
	for opt,arg in opts:
	 	if opt in("-h","--help"):
	 		tl_main.usage();
	 		sys.exit();
	 	elif opt=='-d': d_component=int(arg);
	 	elif opt=='-n': n=int(arg);
	 	elif opt=='-k': k=int(arg);
	 	elif opt=='-t': T_max=int(arg);
	 	elif opt=='-s': svd_routine=int(arg);
	 	elif opt=='-p': svd_parameter=int(arg);
	 	elif opt=='-i': seed=int(arg);
		elif opt=='-m': max_n=int(arg);
		elif opt=='-J': J0=float(arg);
		elif opt=='-j': J2=float(arg)/float(n);
		elif opt=='-b': bias=float(arg);
		elif opt=='-B': beta=float(arg);
		elif opt=='-e': dynamic_type=int(arg);
	
	p=float(k)/float(n);
	if(svd_routine==1): svd_parameter=float(10.**(-svd_parameter));
	print'svd_parameter',svd_parameter
	# GENERATE GRAPH 			
	H=nx.random_regular_graph(k, n, seed); g_name='reg';
	

	out_dir='../data/';
	
	if(dynamic_type==1):
		outfile = open(out_dir+g_name+str(n)+'k'+str(k)+'s'+str(svd_routine)+str(svd_parameter)+'e'+str(dynamic_type)+'b'+str(bias)+'B'+str(beta)+'dyn.dat', 'w')
		out_svd=open(out_dir+g_name+str(n)+'k'+str(k)+'s'+str(svd_routine)+str(svd_parameter)+'e'+str(dynamic_type)+'b'+str(bias)+'B'+str(beta)+'svd.dat', 'w')
		histo_file=open(out_dir+g_name+str(n)+'k'+str(k)+'s'+str(svd_routine)+str(svd_parameter)+'e'+str(dynamic_type)+'b'+str(bias)+'B'+str(beta)+'histo.dat', 'w')
		out_parameters=open(out_dir+'parameters_'+g_name+str(n)+'k'+str(k)+'s'+str(svd_routine)+str(svd_parameter)+'e'+str(dynamic_type)+'b'+str(bias)+'B'+str(beta)+'parameters.dat', 'w')
		
	elif(dynamic_type==0):
		outfile = open(out_dir+g_name+str(n)+'k'+str(k)+'s'+str(svd_routine)+str(svd_parameter)+'e'+str(dynamic_type)+'b'+str(bias)+'dyn.dat', 'w')
		out_parameters=open(out_dir+'parameters_'+g_name+str(n)+'k'+str(k)+'s'+str(svd_routine)+str(svd_parameter)+'e'+str(dynamic_type)+'b'+str(bias)+'parameters.dat', 'w')
		out_svd=open(out_dir+g_name+str(n)+'k'+str(k)+'s'+str(svd_routine)+str(svd_parameter)+'e'+str(dynamic_type)+'b'+str(bias)+'svd.dat', 'w')
	
	
	print H,n,k, g_name
	if(d_component==1):
		print 'Connected components',nx.number_connected_components(H);
		H=sorted(list(nx.connected_component_subgraphs(H)), key = len, reverse=True)[0];  # Giant component is the first element of the list
		H=nx.convert_node_labels_to_integers(H);

	G=nx.DiGraph(H) 
	if(dynamic_type==1):sumJ=tl_dyn.assign_J(J0,J2,G,histo_file);  # Fix disorder for Glauber dynamics

	tl_main.output_parameters(out_parameters,G,d_component,n,k,T_max,svd_routine,svd_parameter,seed,max_n,bias,dynamic_type,beta,J0,J2); out_parameters.close();

	degree_sequence=sorted(nx.degree(G).values(),reverse=True) # degree sequence
	dmax=max(degree_sequence)/2 if(nx.is_directed(G)) else max(degree_sequence);
	print 'k_max=', dmax
	print 'J2,J0 ' , J2,J0

	# Initial Probability distribution  -------
	P=np.array([ [0.5-bias,0.5+bias]  for u in G.nodes() ]); 

	# INITIALIZE observables ---------------------------
	m=[0. for t in range(T_max)]; # magnetization
	q=[0. for t in range(T_max)]; # EA parameter (overlap)
	Cor=[0. for t in range(T_max)]; # spin-spin correlation
	Z=G.number_of_nodes();

	if(dynamic_type==0):
		for u in G.nodes():
			G.node[u]['w']=tl_dyn.w_majority(u,d,G);
			m[0]+=(P[u][1]-P[u][0]);Z+=1;
			q[0]+=(P[u][1]-P[u][0])*(P[u][1]-P[u][0]);
		m[0]/=float(Z);q[0]/=float(Z);
	elif(dynamic_type==1):
		for u in G.nodes():
			J=[ G[u][v]['J'] for v in G.neighbors(u) ];
			G.node[u]['w']=tl_dyn.w_glauber(u,d,G,beta,J);
			m[0]+=(P[u][1]-P[u][0]);Z+=1;
			q[0]+=(P[u][1]-P[u][0])*(P[u][1]-P[u][0]);
		m[0]/=float(Z);q[0]/=float(Z);Z=0.;	
	
	Cor[0]=0.;  #     <==  We start from a factorized initial condition		
	
	# INITIALIZE A's, M's, C's
	for u,v in G.edges():
		M=[random.randrange(Mmin,Mmax) for t in range(T+2)]          
	# messages i --> j
		M[0]=1;M[T]=1;
	# If T=0 just need to initialize A(0) and A(1)
		G[u][v]['As']=[np.random.rand(d,M[0],1)];
		G[u][v]['As'].append(np.random.rand(d,1,1,M[T]));

		G[u][v]['OldA']=[np.random.rand(d,M[0],1)];

		G[u][v]['As'][0][0,:,:]=1.#P[v][0];
		G[u][v]['As'][0][1,:,:]=1.#bP[v][1];
		G[u][v]['As'][1][0,:,:,:]=P[u][0];
		G[u][v]['As'][1][1,:,:,:]=P[u][1];

		G[u][v]['OldA']=G[u][v]['As'][0];

	for u in G.nodes():	G.node[u]['marginal']=P[u];

	sv_ratio=1.;norm_ratio=0.;
	t0 = time.time();t1=t0;
	for t in range(T,T_max):
			
		print >> out_svd, t, tl_obs.max_dimM(G),sv_ratio,norm_ratio,str(t1-t0);out_svd.flush();

		if(t>0):	
			out_mag = open(out_dir+g_name+str(n)+'k'+str(k)+'s'+str(svd_routine)+str(svd_parameter)+'t'+str(t)+'b'+str(bias)+'B'+str(beta)+'mag.dat', 'w')
			out_cor = open(out_dir+g_name+str(n)+'k'+str(k)+'s'+str(svd_routine)+str(svd_parameter)+'t'+str(t)+'b'+str(bias)+'B'+str(beta)+'corr.dat', 'w')
			Cor[t]=tl_obs.calulate_marginals(G,d,out_mag,out_cor);	
			out_mag.close();out_cor.close();

		t1 = time.time()	
		m[t],q[t]=tl_obs.calulate_observable(G,d);

		sv_ratio,norm_ratio=otu.update(dynamic_type,beta,G,d,t,P,svd_routine=svd_routine,svd_threshold=svd_parameter,max_n=max_n);
		
		t2 = time.time()
		time_diff = round(t2 - t1)
		
		print >> outfile, t,m[t], q[t], Cor[t], str(t1-t0); outfile.flush();
		print 't = ', t, ' calculated in ',time_diff, 's';
		print ' <m>= ',m[t], ' q=', q[t], 'Cor=', Cor[t], 
		print 'maxMdim=',tl_obs.max_dimM(G),' sv_ratio=',sv_ratio,' norm_ratio=',norm_ratio;

	#  END CYCLE OVER t	
	time_diff = round(time.time() - t0)
	minute = time_diff / 60
	seconds = time_diff % 60  # Same as time_diff - (minutes * 60)
	print 'Total time=', minute, 'm and', seconds, 's'

	outfile.close();
	out_svd.close();
Exemple #47
0
def analysis(graph, prefix, flag):
    degree_centrality = sorted(nx.degree_centrality(graph).items(),
                               key=lambda x: x[1],
                               reverse=True)
    betweenness_centrality = sorted(nx.betweenness_centrality(graph).items(),
                                    key=lambda x: x[1],
                                    reverse=True)
    closeness_centrality = sorted(nx.closeness_centrality(graph).items(),
                                  key=lambda x: x[1],
                                  reverse=True)
    eigenvector_centrality = sorted(
        nx.eigenvector_centrality_numpy(graph).items(),
        key=lambda x: x[1],
        reverse=True)
    clustering = [(k, v) for k, v in nx.clustering(graph).items()]
    average_degree_connectivity = sorted([
        (k, v) for k, v in nx.average_degree_connectivity(graph).items()
    ])
    average_neighbor_degree = sorted(nx.average_neighbor_degree(graph).items(),
                                     key=lambda x: x[1],
                                     reverse=True)

    print(prefix + " connected components = " +
          str(nx.number_connected_components(graph)))
    print(prefix + " degree assortativity coefficient = " +
          str(nx.degree_assortativity_coefficient(graph)))
    print(prefix + " density = " + str(nx.density(graph)))

    cnt = 0
    for c in nx.connected_components(graph):
        subG = nx.subgraph(graph, c)
        print(type(subG))
        print(c)
        eccentricity = sorted(nx.eccentricity(subG).items(),
                              key=lambda x: x[1],
                              reverse=True)
        create_excel(subG, eccentricity, 'Ime', 'Ekscentricnost',
                     prefix + '_comp' + str(cnt) + '_eccentricity.xlsx', flag)
        print(prefix + " comp" + str(cnt) + " diameter = " +
              str(nx.diameter(subG)))
        print(prefix + " comp" + str(cnt) + " radius = " +
              str(nx.radius(subG)))
        print(prefix + " comp" + str(cnt) + " center = " +
              str(nx.center(subG)))
        print(prefix + " comp" + str(cnt) +
              " average shortest path length = " +
              str(nx.average_shortest_path_length(subG)))
        cnt += 1

    create_excel(graph, degree_centrality, 'Ime', 'Centralnost po stepenu',
                 prefix + '_degree_centrality.xlsx', flag)
    create_excel(graph, betweenness_centrality, 'Ime', 'Relaciona Centralnost',
                 prefix + '_betweenness_centrality.xlsx', flag)
    create_excel(graph, closeness_centrality, 'Ime',
                 'Centralnost po bliskosti',
                 prefix + '_closeness_centrality.xlsx', flag)
    create_excel(graph, eigenvector_centrality, 'Ime',
                 'Eigenvector centralnost',
                 prefix + '_eigenvector_centrality.xlsx', flag)
    create_excel(graph, clustering, 'Ime', 'Faktor klasterizacije',
                 prefix + '_clustering.xlsx', flag)
    create_excel(graph, average_degree_connectivity, 'Stepen',
                 'Prosecan stepen suseda',
                 prefix + '_average_degree_connectivity.xlsx', False)
    create_excel(graph, average_neighbor_degree, 'Ime', 'Stepen suseda',
                 prefix + '_average_neighbor_degree.xlsx', flag)
    D = {}
    S = [len(S_init)]
    I = [len(I_init)]
    R = [len(R_init)]
    a = np.zeros(20)
    b = np.zeros(20)
    c = np.zeros(20)
    e = np.zeros(20)
    Ro = np.zeros(20)
    S_last_20 = np.zeros(20)
    I_last_20 = np.zeros(20)
    R_last_20 = np.zeros(20)
    G = graph_generate(p)
    count = count + 1
    for t in range(len(time)):
        dc = nx.number_connected_components(G)
        if (dc > 1):
            print "break"
            break
        delta_I = 0
        delta_R = 0
        for i in range(len(G.nodes())):
            if (list(G.nodes())[i] in S_init):
                D[list(G.nodes())[i]] = 0
            if (list(G.nodes())[i] in I_init):
                D[list(G.nodes())[i]] = 1
            if (list(G.nodes())[i] in R_init):
                D[list(G.nodes())[i]] = 2

        Suscept_count = len(S_init)
        Infect_count = len(I_init)
 def test_number_weakly_connected_components(self):
     for G, C in self.gc:
         U = G.to_undirected()
         w = nx.number_weakly_connected_components(G)
         c = nx.number_connected_components(U)
         assert_equal(w, c)
def scc_nh_subgraph(m, n):
    return nx.number_connected_components(
        G.subgraph(list(set(G.neighbors(m)).union(set(G.neighbors(n))))))
Exemple #51
0
def Mccabe_Complexity(graph):
    return graph.number_of_edges() - graph.number_of_nodes() + 2 * nx.number_connected_components(graph)
def calculate_modularity(part_G, orig_G, m):
    # print("m",m)
    E = part_G.number_of_edges()
    # print("E",E)
    nodes = part_G.nodes()
    s = 0
    for i in nodes:
        element = orig_G.degree(i)
        s += element
        # print("element",element)
    result = E / m - (s / (2 * m))**2
    # print("result",result)
    return result


for i in range(1, 6):
    print(i, " connected part")
    M = g.number_of_edges()
    parts = list(nx.connected_component_subgraphs(g))
    sum = 0
    for item in parts:
        print(item.nodes())
        sum += calculate_modularity(item, g, M)
    print("Modularity score", sum)
    print("edges removed:", startEdges - M)
    if i != 5:
        while (nx.number_connected_components(g) == i):
            b = approximate_calculate_edge_betweenness(g)
            maxedge = (max(b, key=b.get))
            g.remove_edge(*maxedge)
Exemple #53
0
	def discoverAssociationLocus(self, associationPeakGraph=None, min_overlap_ratio=0.1):
		"""
		2012.12.12 try to output the peaks that are associated with one locus. for each peak, output
				* result-id 
				* phenotype id
				* chromosome
				* start
				* stop
				* start_locus
				* stop_locus
				* no_of_loci
				* peak_locus
				* peak-score
		2012.11.20
		2012.6.24
		"""
		sys.stderr.write("Discovering association loci from graph of %s nodes. %s edges. %s connected components..."%\
						(associationPeakGraph.number_of_nodes(), associationPeakGraph.number_of_edges(), \
						nx.number_connected_components(associationPeakGraph) ))
		cc_graph_list = nx.connected_component_subgraphs(associationPeakGraph)
		counter = 0
		associationLocusList = []
		for cc_graph in cc_graph_list:
			#calculate connectivity of this component
			ne = cc_graph.number_of_edges()
			nn = cc_graph.number_of_nodes()
			if nn>1:
				connectivity = ne/float(nn*(nn-1)/2)
			else:
				connectivity = 1
			start_ls = []
			stop_ls = []
			association_peak_ls = []
			#get span of each node, then take median of all its start/stop
			result_id_set = set()
			chromosome_set = set()	#should be only one chromosome
			phenotype_id_set = set()
			for n in cc_graph:
				nodeObject = associationPeakGraph.node[n]
				chromosome_set.add(nodeObject['chromosome'])
				span = nodeObject['span']
				start_ls.append(span[0])
				stop_ls.append(span[1])
				association_peak_ls.extend(nodeObject['association_peak_ls'])
				result_id_set.add(nodeObject['result_id'])
				phenotype_id_set.add(nodeObject['phenotype_method_id'])
			if len(chromosome_set)>1:
				sys.stderr.write("Error: %s chromosomes (%s) in one connected component.\n"%(len(chromosome_set), repr(chromosome_set)))
				sys.exit(7)
			median_start = numpy.median(start_ls)
			median_stop = numpy.median(stop_ls)
			no_of_results = len(result_id_set)
			
			associationLocus = PassingDataList()
			#assign each value separately to impose the order of variables in associationLocus's internal list
			associationLocus.chromosome = chromosome_set.pop()
			associationLocus.start=median_start
			associationLocus.stop=median_stop
			associationLocus.no_of_peaks=nn
			associationLocus.connectivity=connectivity
			associationLocus.no_of_results=no_of_results
			associationLocus.association_peak_ls=association_peak_ls
			phenotype_id_ls = list(phenotype_id_set)
			phenotype_id_ls.sort()
			associationLocus.phenotype_id_ls_in_str = utils.getStrOutOfList(phenotype_id_ls) 
			#PassingDataList is sortable via (chromosome, start, stop ...)
			associationLocusList.append(associationLocus)
			counter += 1
		sys.stderr.write("%s association loci.\n"%(counter))
		return associationLocusList
Exemple #54
0
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import random

G = nx.read_edgelist("ca-GrQc.txt",
                     comments='#',
                     delimiter='\t',
                     nodetype=int,
                     create_using=nx.Graph())

############## Question 2
# Network Characteristics
print 'Number of nodes:', G.number_of_nodes()
print 'Number of edges:', G.number_of_edges()
print 'Number of connected components:', nx.number_connected_components(G)

# Connected components
GCC = list(nx.connected_component_subgraphs(G))[0]

# Fraction of nodes and edges in GCC
print "Fraction of nodes in GCC: ", GCC.number_of_nodes() / G.number_of_nodes()
print "Fraction of edges in GCC: ", GCC.number_of_edges() / G.number_of_edges()

#%%
############## Question 3
# Degree
degree_sequence = G.degree().values()
print "Min degree ", np.min(degree_sequence)
print "Max degree ", np.max(degree_sequence)
print "Median degree ", np.median(degree_sequence)
Exemple #55
0
def main(adminIsPoint=False):

    ## Define filepath
    path = os.path.realpath(
        os.path.abspath(
            os.path.split(inspect.getfile(inspect.currentframe()))[0]))
    path = os.path.split(path)[0]

    ## Define dash. This .xlsm includes settings for the criticality script
    dash = os.path.join(path, r'dashboard.xlsm')
    ctrl = pd.read_excel(dash, sheetname="AGGREGATE", index_col=0)

    ## Define operative district. Note, this parameter can be anything - it is the sub folder in input, runtime where files are drawn from
    district = ctrl['Weight'].loc['DISTRICT']

    # ensure folders exist
    runtime = os.path.join(path, r'PCS\Criticality\runtime\%s\\' % district)

    ## Add logging
    logging.basicConfig(filename=os.path.join(runtime, "PCS_Criticality.log"),
                        level=logging.INFO,
                        format="%(asctime)s-%(levelname)s: %(message)s")
    logging.info("Starting Criticality Process")
    print "Running: Criticality Analysis on %s. Do not interrupt" % district

    ## Path Settings
    # outputs
    outpath = os.path.join(path, 'Outputs', '%s' % district)

    for d in [outpath, runtime]:
        if not os.path.isdir(d):
            os.mkdir(d)

    ## Input file setting

    # location of OD
    OD_IN = os.path.join(path, 'PCS\Criticality\Input', '%s' % district)

    # location of administrative boundaries file
    DATA_IN = os.path.join(path, 'PCS\Criticality\Data_Layers')
    inAdmin = os.path.join(DATA_IN, 'Poverty_Communes_2009.shp')

    # road network import. Must be a .csv including geometry information of roads.
    inNetworkFile = os.path.join(OD_IN, 'Network.csv')

    # set WGS 84 coordinate reference system
    crs_in = {'init': 'epsg:4326'}

    # ensure folders exist
    for d in [outpath, runtime, OD_IN]:
        if not os.path.isdir(d):
            os.mkdir(d)

    # error checking - Check input data existence
    for curFile in [dash, inNetworkFile, inAdmin, DATA_IN, OD_IN]:
        if not os.path.exists(curFile):
            logging.error("No input found: %s" % curFile)
            raise ValueError("No input found: %s" % curFile)

    # import input dataframes - road network and control dashboard
    inNetwork = pd.read_csv(inNetworkFile)
    ctrldf = pd.read_excel(dash, sheetname="CRITICALITY", index_col='COL_ID')

    #Inputs

    # setting network shapefile location
    network = os.path.join(runtime, 'Network.shp')

    ## Network Preparation
    # set default iri value as the mean iri of roads for which iri exists.
    fillvalue = inNetwork['iri_med'].mean()

    # fill iri value where missing
    inNetwork['TC_iri_med'] = inNetwork['iri_med'].fillna(fillvalue)

    # set cost of traversing segment according to length and IRI, per settings in the excel dashboard
    inNetwork['total_cost'] = inNetwork['length'] * (
        ctrldf['Base_cost_km'][0] +
        (ctrldf['IRI_Coeff'][0] * inNetwork['TC_iri_med']))

    # convert the pandas DataFrame to a GeoDataFrame
    ginNetwork = gpd.GeoDataFrame(inNetwork,
                                  crs=crs_in,
                                  geometry=inNetwork['Line_Geometry'].map(
                                      shapely.wkt.loads))

    # set up Shapefile of road network

    ginNetwork.to_file(network, driver='ESRI Shapefile')
    logging.info("Successfully loaded data")

    # Generate admin boundary centroids
    if not adminIsPoint:
        prepareAdminCentroids(ginNetwork, inAdmin, crs_in,
                              os.path.join(OD_IN, 'adm_centroids.shp'))
        logging.info("Created admin centroids")

    # define function for loading origin files into a dictionary. Paramters controlled from dashboard excel
    def makeOrigin(n, ctrldf):
        origindict = {
            'name':
            ctrldf['OName'][n],
            'file':
            os.path.join(path, 'PCS', 'Criticality', 'Input', district,
                         '%s.shp' % ctrldf['OName'][n]),
            'scalar_column':
            ctrldf['OScalar'][n]
        }
        return origindict

    # define function for loading destination files into a dictionary. Paramters controlled from dashboard excel
    def makeDestination(n, ctrldf):
        destdict = {
            'name':
            ctrldf['DName'][n],
            'file':
            os.path.join(path, 'PCS', 'Criticality', 'Input', district,
                         '%s.shp' % ctrldf['DName'][n]),
            'penalty':
            ctrldf['DPenalty'][n],
            'importance':
            ctrldf['DImportance'][n],
            'annual':
            ctrldf['DAnnual'][n],
            'scalar_column':
            ctrldf['DScalar'][n]
        }
        return destdict

    # load origins and destinations into dictionary, create dictionaries of each set
    origin_1, origin_2, origin_3, origin_4, origin_5 = makeOrigin(
        0, ctrldf), makeOrigin(1, ctrldf), makeOrigin(2, ctrldf), makeOrigin(
            3, ctrldf), makeOrigin(4, ctrldf)
    originlist = {
        '%s' % ctrldf['OName'][0]: origin_1,
        '%s' % ctrldf['OName'][1]: origin_2,
        '%s' % ctrldf['OName'][2]: origin_3,
        '%s' % ctrldf['OName'][3]: origin_4,
        '%s' % ctrldf['OName'][4]: origin_5,
    }
    destination_1, destination_2, destination_3, destination_4, destination_5 = makeDestination(
        0, ctrldf), makeDestination(1, ctrldf), makeDestination(
            2, ctrldf), makeDestination(3, ctrldf), makeDestination(4, ctrldf)
    destinationlist = {
        '%s' % ctrldf['DName'][0]: destination_1,
        '%s' % ctrldf['DName'][1]: destination_2,
        '%s' % ctrldf['DName'][2]: destination_3,
        '%s' % ctrldf['DName'][3]: destination_4,
        '%s' % ctrldf['DName'][4]: destination_5,
    }
    logging.info("Opened origins and destinations")

    # Prepation of network via TU Delft code
    gdf_points, gdf_node_pos, gdf = net_p.prepare_centroids_network(
        origin_1['file'], network)

    gdf.to_csv(
        os.path.join(
            r'C:\Users\charl\Documents\GitHub\Criticality\PCS\Criticality\Runtime\[district_1]',
            'gdf.csv'))
    gdf_node_pos.to_csv(
        os.path.join(
            r'C:\Users\charl\Documents\GitHub\Criticality\PCS\Criticality\Runtime\[district_1]',
            'gdf_node_pos.csv'))

    # Create Networkx MultiGraph object from the GeoDataFrame
    G = net_p.gdf_to_simplified_multidigraph(gdf_node_pos, gdf, simplify=False)

    # Change the MultiGraph object to Graph object to reduce computation cost
    G_tograph = net_p.multigraph_to_graph(G)
    logging.info(
        'Loaded road network: number of disconnected components is: %d' %
        nx.number_connected_components(G_tograph))

    # Observe the properties of the Graph object
    nx.info(G_tograph)

    # Take only the largest subgraph with all connected links
    len_old = 0
    for g in nx.connected_component_subgraphs(G_tograph):
        if len(list(g.edges())) > len_old:
            G1 = g
            len_old = len(list(g.edges()))
    G_sub = G1.copy()

    nx.info(G_sub)

    # Save the simplified transport network into a GeoDataFrame
    gdf_sub = net_p.graph_to_df(G_sub)
    blank, gdf_node_pos2, gdf_new = net_p.prepare_newOD(
        origin_1['file'], gdf_sub)

    #Road Network Graph prep
    G2_multi = net_p.gdf_to_simplified_multidigraph(gdf_node_pos2,
                                                    gdf_new,
                                                    simplify=False)

    # Dump files to runtime if dump = 1
    Filedump(gdf_new, 'Road_Lines', runtime)
    Filedump(gdf_node_pos2, 'Road_Nodes', runtime)
    G2 = net_p.multigraph_to_graph(G2_multi)
    gdf2 = net_p.graph_to_df(G2)
    nLink = len(G2.edges())

    # open empty lists
    Outputs, cost_list, iso_list = [], [], []

    ## Run the calculateOD function for each combination of origins and destinations specified in the control excel
    # append all outputs to the Outputs, cost_list and iso_list objects just created
    for z in ctrldf.index:
        if (((ctrldf['ComboO'][z]) != 0) & ((ctrldf['ComboD'][z]) != 0) &
            (pd.notnull(ctrldf['ComboO'][z])) &
            (pd.notnull(ctrldf['ComboO'][z]))):
            Q = int(ctrldf['ComboNumber'][z])
            logging.info(
                'Computing | combination %s as origin and %s as destination ' %
                (ctrldf['ComboO'][z], ctrldf['ComboD'][z]))
            xx = calculateOD(originlist['%s' % ctrldf['ComboO'][z]],
                             destinationlist['%s' % ctrldf['ComboD'][z]], Q,
                             gdf_sub, G2, nLink, gdf2, runtime, ctrldf)
            Outputs.append(xx)
            cost_list.append("Social_Cost_%s" % Q)
            iso_list.append("Isolated_Trips_%s" % Q)

    # drop unneccessary columns
    Output = inNetwork.drop(["geometry", 'TC_iri_med', 'total_cost'], axis=1)

    # for each object in the Outputs list:
    for o_d_calc in range(0, len(Outputs)):

        # Merge the objects together. This creates multiple columns showing each scenario
        Output = Output.merge(Outputs[o_d_calc]['summary'],
                              how='left',
                              on='ID')

    # sum across the relevant columns - the 'Social_Cost' columns generated above in calculateOD for each O-D file combo
    Output['Cost_total'] = Output[cost_list].sum(axis=1)

    # sum across the relevant columns - the 'Isolated_Trips' columns generated above in calculateOD for each O-D file combo
    Output['Iso_total'] = Output[iso_list].sum(axis=1)

    # Generate an overall criticality score for each road based on user input weights between isolated trips and disrupted trips
    Output['CRIT_SCORE'] = (
        ctrldf['Disrupt_Weight'][0] * Output['Cost_total'] +
        ctrldf['Isolate_Weight'][0] * Output['Iso_total'])

    # normalize for each road
    Output['CRIT_SCORE'] = (
        (Output['CRIT_SCORE'] - Output['CRIT_SCORE'].min()) /
        (Output['CRIT_SCORE'].max() - Output['CRIT_SCORE'].min()))
    logging.info("Calculated PCS Criticality")
    FileOut(Output, 'criticality_output', outpath)
Exemple #56
0
def components(bot, update):
    print('received components')
    global global_graph
    bot.send_message(chat_id=update.message.chat_id,
                     text=str(nx.number_connected_components(global_graph)))
Exemple #57
0
def get_stats(G, output_path=None, all_stats=False):
    """
    Prints or stores some basic statistics about the graph. If an output path is provided the results are written in
    said file.

    Parameters
    ----------
    G : graph
        A NetworkX graph or digraph.
    output_path : file or string, optional
        File or filename to write. Default is None
    all_stats : bool, optional
        Sets if all stats or a small subset of them should be shown. Computing all stats can be very slow.
        Default is False.
    """
    # Compute the number of nodes and edges of the graph
    N = len(G.nodes)
    M = len(G.edges)

    # Compute average degree and deg1 and deg2 num nodes
    degs = np.array(G.degree)[:, 1]
    avgdeg = sum(degs) / N
    counts = collections.Counter(degs)
    degdict = collections.OrderedDict(sorted(counts.items()))
    deg1 = degdict.get(1, 0)
    deg2 = degdict.get(2, 0)

    if all_stats:
        x = np.log(np.array(degdict.keys()))  # degrees
        y = np.log(np.array(degdict.values()))  # frequencies
        # the power-law coef. is the slope of a linear moder fitted to the loglog data which has closed-form solution
        plawcoef = np.abs(np.cov(x, y) / np.var(x))[0, 1]
        cc = nx.average_clustering(G)
        dens = nx.density(G)
        if G.is_directed():
            diam = nx.diameter(G) if nx.is_strongly_connected(G) else float(
                'inf')
        else:
            diam = nx.diameter(G)

    # Print or write to file the graph info
    if output_path is None:
        # Print some basic info about the graph
        if G.is_directed():
            num_ccs = nx.number_weakly_connected_components(G)
            Gcc = max(nx.weakly_connected_component_subgraphs(G), key=len)
            Ncc = len(Gcc.nodes)
            Mcc = len(Gcc.edges)
            print("Directed graph")
            print("Num. nodes: {}".format(N))
            print("Num. edges: {}".format(M))
            print("Num. weakly connected components: {}".format(num_ccs))
            print("Num. nodes in largest weakly CC: {} ({} % of total)".format(
                Ncc, Ncc * 100.0 / N))
            print("Num. edges in largest weakly CC: {} ({} % of total)".format(
                Mcc, Mcc * 100.0 / M))
        else:
            num_ccs = nx.number_connected_components(G)
            Gcc = max(nx.connected_component_subgraphs(G), key=len)
            Ncc = len(Gcc.nodes)
            Mcc = len(Gcc.edges)
            print("Undirected graph")
            print("Num. nodes: {}".format(N))
            print("Num. edges: {}".format(M))
            print("Num. connected components: {}".format(num_ccs))
            print("Num. nodes in largest weakly CC: {} ({} % of total)".format(
                Ncc, Ncc * 100.0 / N))
            print("Num. edges in largest weakly CC: {} ({} % of total)".format(
                Mcc, Mcc * 100.0 / M))
        if all_stats:
            print("Clustering coefficient: {}".format(cc))
            print("Diameter: {}".format(diam))
            print("Density: {}".format(dens))
            print("Power-law coefficient: {}".format(plawcoef))
        print("Avg. node degree: {}".format(avgdeg))
        print("Num. degree 1 nodes: {}".format(deg1))
        print("Num. degree 2 nodes: {}".format(deg2))
        print("Num. self loops: {}".format(G.number_of_selfloops()))
        print("")
    else:
        # Write the info to the provided file
        f = open(output_path, 'w+b')
        if G.is_directed():
            num_ccs = nx.number_weakly_connected_components(G)
            Gcc = max(nx.weakly_connected_component_subgraphs(G), key=len)
            Ncc = len(Gcc.nodes)
            Mcc = len(Gcc.edges)
            f.write("# Directed graph".encode())
            f.write("\n# Num. nodes: {}".format(N).encode())
            f.write("\n# Num. edges: {}".format(M).encode())
            f.write("\n# Num. weakly connected components: {}".format(
                num_ccs).encode())
            f.write("\n# Num. nodes in largest weakly CC: {} ({} % of total)".
                    format(Ncc, Ncc * 100.0 / N).encode())
            f.write("\n# Num. edges in largest weakly CC: {} ({} % of total)".
                    format(Mcc, Mcc * 100.0 / M).encode())
        else:
            num_ccs = nx.number_connected_components(G)
            Gcc = max(nx.connected_component_subgraphs(G), key=len)
            Ncc = len(Gcc.nodes)
            Mcc = len(Gcc.edges)
            f.write("# Undirected graph".encode())
            f.write("\n# Num. nodes: {}".format(N).encode())
            f.write("\n# Num. edges: {}".format(M).encode())
            f.write(
                "\n# Num. connected components: {}".format(num_ccs).encode())
            f.write("\n# Num. nodes in largest CC: {} ({} % of total)".format(
                Ncc, Ncc * 100.0 / N).encode())
            f.write("\n# Num. edges in largest CC: {} ({} % of total)".format(
                Mcc, Mcc * 100.0 / M).encode())
        if all_stats:
            f.write("\n# Clustering coefficient: {}".format(cc).encode())
            f.write("\n# Diameter: {}".format(diam).encode())
            f.write("\n# Density: {}".format(dens).encode())
            f.write("\n# Power-law coefficient: {}".format(plawcoef).encode())
        f.write("\n# Avg. node degree: {}".format(avgdeg).encode())
        f.write("\n# Num. degree 1 nodes: {}".format(deg1).encode())
        f.write("\n# Num. degree 2 nodes: {}".format(deg2).encode())
        f.write("\n# Num. self loops: {}".format(
            G.number_of_selfloops()).encode())
        f.write("\n".encode())
        f.close()
Exemple #58
0
def double_GLF_heuristic(radii,
                         simplex_measure,
                         tri,
                         d=2,
                         a_mid=.5,
                         heuristic='minima',
                         opt_method='ampgo',
                         eval_parameters=['geom', 30000],
                         debug=False,
                         opt_kws=dict()):
    """

    Parameters
    ----------
    radii : array of shape [n_centers, n_features]
    
    simplex_measure : array of shape [n_gaussian_samples, n_features]
        Generated by make_multivariate_gaussians

    opt_method : string, compatible with lmfit
        Determines optimization routine for regression, recommend global optimization
        such as ampgo or basinhopping
    
    heuristic : string

    eval_parameters : list of length 2
    Returns
    -------

    """
    orig_x = np.array(radii[np.argsort(radii)])
    fit_x = orig_x
    n_inflection = 0

    fit_y = np.cumsum(
        simplex_measure[np.argsort(radii)]) / np.sum(simplex_measure)
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')

        d_glf_model = Model(double_glf)
        d_glf_model.set_param_hint('a1', value=0, min=0, max=1, vary=False)
        d_glf_model.set_param_hint('adiff1', value=a_mid, min=0.0005, max=1)
        d_glf_model.set_param_hint('a2', value=.5, min=0, expr='adiff1 + a1')
        d_glf_model.set_param_hint('a3', value=1, min=0, vary=False)
        d_glf_model.set_param_hint('b1', value=1, min=0, max=1000)
        d_glf_model.set_param_hint('b2', value=1, min=0, max=1000)
        d_glf_model.set_param_hint('c1', value=1, min=0.00001, vary=False)
        d_glf_model.set_param_hint('c2', value=1, min=0.00001, vary=False)
        d_glf_model.set_param_hint('q1', value=1, min=0.00001, max=1)
        d_glf_model.set_param_hint('q2', value=1, min=0.00001, max=1)
        d_glf_model.set_param_hint('v1', value=1, min=0.00001, max=100)
        d_glf_model.set_param_hint('v2', value=1, min=0.00001, max=100)

        d_glf_result = d_glf_model.fit(fit_y,
                                       x=fit_x,
                                       method=opt_method,
                                       nan_policy='propagate',
                                       fit_kws=opt_kws)

    # Use yeval to calculate r2
    y_pred = double_glf(fit_x, d_glf_result.params['a1'].value, d_glf_result.params['a2'].value, \
                        d_glf_result.params['a3'].value, d_glf_result.params['c1'].value, \
                        d_glf_result.params['c2'].value, d_glf_result.params['b1'].value, \
                        d_glf_result.params['b2'].value, d_glf_result.params['q1'].value, \
                        d_glf_result.params['q2'].value, d_glf_result.params['v1'].value, \
                        d_glf_result.params['v2'].value)
    R2 = 1 - np.sum((fit_y - y_pred)**2) / np.sum((fit_y - np.mean(fit_y))**2)

    if eval_parameters[0] == 'geom':
        x_eval = np.geomspace(np.min(radii),
                              np.max(radii),
                              num=eval_parameters[1])
    elif eval_parameters[0] == 'linear':
        x_eval = np.linspace(np.min(radii),
                             np.max(radii),
                             num=eval_parameters[1])

    # Determine slope and concavity for use in determining the optimal alpha value

    y_eval = double_glf(x_eval, d_glf_result.params['a1'].value, d_glf_result.params['a2'].value, \
                        d_glf_result.params['a3'].value, d_glf_result.params['c1'].value, \
                        d_glf_result.params['c2'].value, d_glf_result.params['b1'].value, \
                        d_glf_result.params['b2'].value, d_glf_result.params['q1'].value, \
                        d_glf_result.params['q2'].value, d_glf_result.params['v1'].value, \
                        d_glf_result.params['v2'].value)

    y_slope = double_glf_dx(x_eval, d_glf_result.params['a1'].value, d_glf_result.params['a2'].value, \
                        d_glf_result.params['a3'].value, d_glf_result.params['c1'].value, \
                        d_glf_result.params['c2'].value, d_glf_result.params['b1'].value, \
                        d_glf_result.params['b2'].value, d_glf_result.params['q1'].value, \
                        d_glf_result.params['q2'].value, d_glf_result.params['v1'].value, \
                        d_glf_result.params['v2'].value)

    y_con = double_glf_dx2(x_eval, d_glf_result.params['a1'].value, d_glf_result.params['a2'].value, \
                           d_glf_result.params['a3'].value, d_glf_result.params['c1'].value, \
                           d_glf_result.params['c2'].value, d_glf_result.params['b1'].value, \
                           d_glf_result.params['b2'].value, d_glf_result.params['q1'].value, \
                           d_glf_result.params['q2'].value, d_glf_result.params['v1'].value, \
                           d_glf_result.params['v2'].value)

    peaks, __ = find_peaks(y_slope, width=2, height=.005)
    maxima, minima = find_extrema(y_slope, threshold=0)
    inflection, __ = find_peaks(np.abs(np.gradient(np.sign(y_con))))

    single_1 = glf(x_eval, d_glf_result.params['a1'].value, d_glf_result.params['a2'].value, \
               d_glf_result.params['c1'].value, d_glf_result.params['b1'].value, \
               d_glf_result.params['q1'].value, d_glf_result.params['v1'].value)

    single_2 = glf(x_eval, 0, d_glf_result.params['a3'].value - d_glf_result.params['a2'].value, \
               d_glf_result.params['c2'].value, d_glf_result.params['b2'].value, \
               d_glf_result.params['q2'].value, d_glf_result.params['v2'].value)

    plt.plot(x_eval, single_1, linestyle='--')
    plt.plot(x_eval, single_2, linestyle='-.')
    plt.ylabel('% Convex Volume')
    plt.xlabel('Alpha')
    plt.xscale('log')
    plt.legend(['GLF 1', 'GLF 2'])
    if debug == True:
        plt.show()
    else:
        plt.close()

    plt.figure()
    plt.plot(fit_x, fit_y, linestyle='none', marker='.')
    plt.plot(x_eval, y_eval, linestyle='-.')
    plt.ylabel('% Convex Volume')
    plt.xlabel('Alpha')
    plt.xscale('log')

    #d_glf_result.plot()
    plt.title(opt_method)
    plt.axhline(d_glf_result.params['a1'].value)
    plt.axhline(d_glf_result.params['a3'].value -
                d_glf_result.params['a2'].value)
    plt.axhline(d_glf_result.params['a3'].value)
    plt.xscale('log')

    for i in peaks:
        plt.axvline(x_eval[i], color='r', linestyle='--')

    for i in minima:
        for k in range(len(peaks) - 1):
            if i > peaks[k] and i < peaks[k + 1]:
                plt.axvline(x_eval[i], color='r', linestyle='--')

    for i in inflection:
        plt.axvline(x_eval[i], color='k', linestyle='--')

    if debug == True:
        plt.show()
    else:
        plt.close()

    fig, axs = plt.subplots(3, 1, sharex=True)
    axs[0].plot(fit_x, fit_y)
    axs[0].plot(x_eval, y_eval)
    axs[1].plot(x_eval, y_slope)
    axs[2].plot(x_eval, y_con)
    axs[0].set_xlabel('Alpha')
    axs[1].set_ylabel('Volume')
    axs[1].set_ylabel('First Derivative')
    axs[1].set_ylim(0, 1.5 * np.min(y_slope[peaks]))
    axs[2].set_ylabel('Second Derivative')
    axs[0].set_xscale('log')
    axs[1].set_xscale('log')
    axs[2].set_xscale('log')

    for i in peaks:
        axs[0].axvline(x_eval[i], color='r', linestyle='--')
        axs[1].axvline(x_eval[i], color='r', linestyle='--')

    for i in minima:
        for k in range(len(peaks) - 1):
            if i > peaks[k] and i < peaks[k + 1]:
                axs[0].axvline(x_eval[i], color='r', linestyle='--')
                axs[1].axvline(x_eval[i], color='r', linestyle='--')

    for i in inflection:
        axs[1].axvline(x_eval[i], color='k', linestyle='--')
        axs[0].axvline(x_eval[i], color='k', linestyle='--')
    if debug == True:
        plt.show()
    else:
        plt.close()

    optimal_alpha = x_eval[-1] + 1
    if len(peaks) == 1:
        message = 'Convex: Only one region of maximal slope'
        if debug:
            print(message)
        flag = False
        optimal_alpha = x_eval[-1] + 1

    else:
        if heuristic == 'minima':
            n_inflection = 2
            for i in minima:
                for k in range(len(peaks) - 1):
                    if i > peaks[k] and i < peaks[k + 1]:
                        #print(x_eval[i])
                        alpha_boolean = alpha_hull.alpha_shape(
                            tri, radii, x_eval[i])
                        edges, vertices = alpha_hull.collect_alpha_edges(
                            tri, alpha_boolean, d=d)
                        G_alpha = network_from_collection(edges, vertices)
                        n_components = nx.number_connected_components(G_alpha)

                        if n_components > 1:
                            message = 'Convex: Slope minima is disjoint'
                            if debug:
                                print(message)
                            flag = False
                            optimal_alpha = x_eval[-1] + 1

                        elif len(vertices) != len(tri.points):
                            message = 'Convex: Slope minima only contains ' + str(
                                100 * len(vertices) /
                                len(tri.points)) + ' percent of samples'
                            if debug:
                                print(message)
                            flag = False
                            optimal_alpha = x_eval[-1] + 1

                        else:
                            message = 'Concave: Slope minima contains all samples'
                            if debug:
                                print(message)
                            flag = True
                            optimal_alpha = x_eval[i]

        elif heuristic == 'inflection':
            counter = 1
            for i in inflection[1::]:
                #print(x_eval[i])
                counter += 1
                alpha_boolean = alpha_hull.alpha_shape(tri, radii, x_eval[i])
                edges, vertices = alpha_hull.collect_alpha_edges(tri,
                                                                 alpha_boolean,
                                                                 d=d)
                G_alpha = network_from_collection(edges, vertices)
                n_components = nx.number_connected_components(G_alpha)

                if counter >= 4:
                    message = 'Convex: An inflection point exceeding the third contains all the points and is most likely noise.'
                    if debug:
                        print(message)
                    flag = False
                    optimal_alpha = x_eval[-1] + 1
                    break

                elif n_components > 1:
                    message = 'Convex: The inflection point, (' + str(
                        counter) + '), at ' + str(i) + ' is disjoint'
                    if debug:
                        print(message)
                    flag = False
                    optimal_alpha = x_eval[-1] + 1

                elif len(vertices) == len(tri.points):
                    message = 'Concave: The inflection point, (' + str(
                        counter) + '), at ' + str(i) + ' contains ' + str(
                            100 * len(vertices) /
                            len(tri.points)) + ' percent of samples'
                    flag = True
                    if debug:
                        print(message)
                    optimal_alpha = x_eval[i]
                    n_inflection = counter
                    break

                elif counter == len(inflection):
                    message = 'Convex: None of the inflection points contained all samples'
                    flag = False
                    if debug:
                        print(message)
                    optimal_alpha = x_eval[-1] + 1

    return optimal_alpha, d_glf_result, R2, n_inflection, flag, message
        for j in range(len(list(i)) - 1):
            if u == i[j] and v == i[j + 1]:
                count += 1
    for i in G.edges():
        if (u == i[0] and v == i[1]) or (u == i[1] and v == i[0]):
            Edges[i] += count
    return len(paths)


#Main function
for i in G.edges():
    count = 0
    for u in G.nodes():
        for v in G.nodes():
            if u != v:
                P = nx.all_shortest_paths(G, u, v)
                count += betweeness(list(P), i[0], i[1])
    Edges[i] = float(Edges[i]) / count

plt.figure("Communities")

print("Betweeness                  Removed Edges")
for i in reversed(sorted((v, k) for (k, v) in Edges.items())):
    print(str(i[0]) + "		" + str(i[1]))
    G.remove_edge(i[1][0], i[1][1])
    if nx.number_connected_components(G) == N:
        break

nx.draw(G)
plt.show()
Exemple #60
-1
def gnewman(club,splitTo = 2):
    itteration = 0
    # ok so why do I check the number of connected components
    # for an undirected graph it is know that a connected component of an
    # an undirected graph is a subgraph in which any two vertices are connected to each other by paths
    # this is useful for this application since we are splitting a graph into two subgraphs
    # ie to mathematically represent the splitting of the club
    while nx.number_connected_components(club) < splitTo:
        # returns to us edges with the weights
        between = nx.edge_betweenness_centrality(club,normalized=False)
        # we want the edges with the highest edge betweenness centrality
        # there might be ties so just get the max betweenness
        m = max(between.values())
        # unpack the tuple returned to us by between.items ((u,v), maxBetweenScore)
        for (hU,hV),val in between.items():
            # check to see if m(max betweenness score) is equal to val
            # removes ties along the way
            if val == m:
                club.remove_edge(hU,hV)
                print("removed edge %s--%s with betweenness score of %f"%(hU,hV,m))
        itteration += 1

        print("-------------------------")
        # this print out can be uncommented it simply shows the same metric as described two different ways
        # print(nx.number_connected_components(club),len(list(nx.connected_component_subgraphs(club))))
    print("total iterations %d for splitting into %d"%(itteration,splitTo))