def communities(self, nCommunities, weight=None): """ Compute communities. Parameters ---------- nCommunities - number of communities to be returned. This is added to simplify the process, the original GN algorithm doesn't need predecided number of communities. Other measures like a threshold on betweenness centrality can be used instead. weight (string) - If None, all edge weights are considered equal. Otherwise holds the name of the edge attribute used as weight. Returns -------- A list of communities where each community is a list of the nodes in the community. """ gr = self.g n = nx.number_connected_components(gr) components = nx.connected_components(gr) while (n < nCommunities): gr = self.communitySplits(gr, weight=weight) components = nx.connected_components(gr) n = nx.number_connected_components(gr) if gr.number_of_edges() == 0: break return components
def communitySplits(self, graph): """ Compute the splits for the formation of communities. Arguments: graph - A networkx graph of digraph. Returns: The graph with weak edges removed. """ nConnComp = nx.number_connected_components(graph) nComm = nConnComp while (nComm <= nConnComp): betweenness = nx.edge_betweenness_centrality(graph) if (len(betweenness.values()) != 0 ): max_betweenness = max(betweenness.values()) else: break for u,v in betweenness.iteritems(): if float(v) == max_betweenness: graph.remove_edge(u[0], u[1]) nComm = nx.number_connected_components(graph) return graph
def detectBetweenness(G, numClusters, sites, bipartite): Gnew = copy.deepcopy(G) numComponents = nx.number_connected_components(G) betweenness = nx.edge_betweenness_centrality(Gnew, weight='capacity') pickle.dump(betweenness, open("betweennessUnipartite.p", "wb")) #betweenness = pickle.load("betweenessUnipartite.p", "rb") while (numComponents < numClusters): print "num components is now ", numComponents ### REMEMBER TO DELETE THIS ### # calculate betweenness of each edge betweenness = nx.edge_betweenness_centrality(Gnew, weight='capacity') ## identify and remove the edge with highest betweenness max_ = max(betweenness.values()) for k, v in betweenness.iteritems(): if float(v) == max_: G.remove_edge(k[0], k[1]) numComponents = nx.number_connected_components(G) clusters = {} i=0 j = 0 for component in list(nx.connected_components(Gnew)): for node in component: if node in sites: clusters[node] = i j +=1 print j, "Nodes in cluster ", i j = 0 i += 1 return clusters
def deleteExtraEdges(cg, b,VERBOSE=False): ndist = {} numConnected = nx.number_connected_components(cg) if( VERBOSE ): print("number of nodes is ",cg.number_of_nodes()) for n in cg.neighbors(b): # test whether deleting the edge between n and b increases # the number of connected components cg.remove_edge(b,n) newNumConnected = nx.number_connected_components(cg) if( newNumConnected == numConnected ): # then this could be a valid deletion # compute the step distance from n to its neighbor b if( VERBOSE ): print("the edge between %s and %s can be cut without changing the topology of the graph"%(b,n)) ndist[(b,n)] = math.sqrt((n[0]-b[0])**2+(n[1]-b[1])**2+(n[2]-b[2])**2) cg.add_edge(b,n) if( ndist ): items = list(ndist.items()) #rearrange node,distance pairing so we can sort on distance k,v = list(zip(*items)) items = list(zip(v,k)) maxNeighbor = max(items) # cut the maximum step length edge that is valid to cut if( VERBOSE ): print("removing edge",maxNeighbor[1][0],maxNeighbor[1][1]) cg.remove_edge(maxNeighbor[1][0],maxNeighbor[1][1]) cg = deleteExtraEdges(cg,b) return cg
def general_fiedler(G, k, trials, plotname): '''Number of components when you apply the threshold cut on a random vector in the span of 1st k''' v = keigenvectors(G, k) print v flag = 1 x_data = [] y_data = [] for i in range(trials): z = randomvector(v) (y1, y2) = thresholdcut(z, 0) H1 = G.subgraph(y1) n1 = nx.number_connected_components(H1) H2 = G.subgraph(y2) n2 = nx.number_connected_components(H2) if n1 < n2: n = n1 else: n = n2 x_data.append(i) y_data.append(n) if n > k-1: flag = 0 print 'Number of components: ' + str(n) print 'z = ' + str(z) if flag: print 'Not found, number of components: ' + str(n) k_data = [k-1 for x in x_data] plt.plot(x_data, y_data, 'ro') plt.plot(x_data, k_data, linewidth=2) plt.axis([0, trials, 0, k+10]) plt.savefig(plotname)
def get_bridges(graph): all_edges = graph.edges(keys=True,data=True) for e in all_edges: graph.remove_edge(*e[:-1]) removed_comps = nx.number_connected_components(graph) graph.add_edge(*e) # Will maintain the original key associated with this edge if nx.number_connected_components(graph) < removed_comps: yield e
def get_number_of_components(filename): import networkx as nx threshold = 0 f = open(filename[:-4]+'_components.dat','w') for i in range(0,101): threshold = float(i)/100 G = get_threshold_matrix(filename, threshold) print 'number of connected components:', nx.number_connected_components(G) f.write("%f\t%d\n" % (threshold, nx.number_connected_components(G))) f.close()
def Girvannewman(G): initialcomp = nx.number_connected_components(G) '''totalnumcomp = initialcomp while totalnumcomp <= initialcomp:''' bw = nx.edge_betweenness_centrality(G) maximum_value = max(bw.values()) for key, value in bw.iteritems(): if float(value) == maximum_value: G.remove_edge(key[0],key[1]) totalnumcomp = nx.number_connected_components(G)
def convert_to_lineage(): inf_modes = ['incidence_p', 'incidence_c'] exits = ['c_to_death', 'remove_s', 'remove_p', 'remove_c'] parent = "/home/ethan/Dropbox/pkl/" index = 0 for file in os.listdir(parent): print file infile = open(parent + file, 'r') lineage = nx.DiGraph(weighted=True) abm = cPickle.load(infile) tree = abm.tree history = abm.agent_history infected = sorted(tree.nodes(), key=lambda x: x.i_time) terminal_map = {} for i in infected: try: a = history[i] except KeyError: infected.remove(i) for i in infected: out = [] out.append(i) nei = sorted(tree.neighbors(i), key=lambda x: x.i_time) for n in nei: if n.i_time > i.i_time: out.append(n) end_time = 5000 terminus = Agent() terminus.i_time = end_time terminus.ID = i.ID for event in history[i]: if event[0] in exits: terminus.i_time = event[1] out.append(terminus) terminal_map[i] = terminus for x in range(len(out) - 1): lineage.add_edge(out[x], out[x + 1], data=abs(out[x].i_time - out[x + 1].i_time)) dic = {'lineage' : lineage, 'history' : history, 'terminal map' : terminal_map} out = open(parent + 'lin' + str(index) + '.pkl', 'w') cPickle.dump(dic, out) print nx.number_connected_components(lineage.to_undirected()), nx.number_connected_components(tree) infile.close() out.close() index += 1
def info(self): print "============================" print nx.info(self.G) print "============================" #print "degree distribution: " #print nx.degree_histogram(self.G) print "============================" print "number of connected components:" if self.directed_graph == False: print nx.number_connected_components(self.G) print "============================"
def CmtyGirvanNewmanStep(G): init_ncomp = nx.number_connected_components(G) #no of components ncomp = init_ncomp while ncomp <= init_ncomp: bw = nx.edge_betweenness_centrality(G, weight='weight') #edge betweenness for G #find the edge with max centrality max_ = max(bw.values()) #find the edge with the highest centrality and remove all of them if there is more than one! for k, v in bw.iteritems(): if float(v) == max_: G.remove_edge(k[0],k[1]) #remove the central edge ncomp = nx.number_connected_components(G) #recalculate the no of components
def getTrafficConnectedComponentGraph(G): H = G.copy() to_remove = [] for (s,d) in H.edges(): if H[s][d]['weight'] <= 2: to_remove.extend([(s,d)]) H.remove_edges_from(to_remove) #print list(networkx.connected_components(H)) print networkx.number_connected_components(H) Gc = max(networkx.connected_component_subgraphs(H), key=len) #drawGraph(Gc, connected=True) return Gc
def IsDivided(fragment): nodes = set() for ring in fragment: nodes |= set(_rings[ring]) G2 = _G.copy() ebunch = [] for i in nodes: for j in _G.neighbors(i): ebunch.append((i,j)) #G2.remove_edges_from(ebunch) G2.remove_nodes_from(nodes) logging.debug("NCOMPO: {0} {1}".format(nx.number_connected_components(G2),_ncompo)) return nx.number_connected_components(G2) > _ncompo
def plot_additional(self, home_nodes, levels=0): """Add nodes to existing plot. Prompt to include link to existing if possible. home_nodes are the nodes to add to the graph""" new_nodes = self._neighbors(home_nodes, levels=levels) new_nodes = home_nodes.union(new_nodes) displayed_data_nodes = set([ v['dataG_id'] for k,v in self.dispG.node.items() ]) # It is possible the new nodes create a connection with the existing # nodes; in such a case, we don't need to try to find the shortest # path between the two blocks current_num_islands = nx.number_connected_components(self.dispG) new_num_islands = nx.number_connected_components( self.dataG.subgraph(displayed_data_nodes.union(new_nodes))) if new_num_islands > current_num_islands: # Find shortest path between two blocks graph and, if it exists, # ask the user if they'd like to include those nodes in the # display as well. # First, create a block model of our data graph where what is # current displayed is a block, the new nodes are a a block all_nodes = set(self.dataG.nodes()) singleton_nodes = all_nodes - displayed_data_nodes - new_nodes singleton_nodes = map(lambda x: [x], singleton_nodes) partitions = [displayed_data_nodes, new_nodes] + \ list(singleton_nodes) B = nx.blockmodel(self.dataG, partitions, multigraph=True) # Find shortest path between existing display (node 0) and # new display island (node 1) try: path = nx.shortest_path(B, 0, 1) except nx.NetworkXNoPath: pass else: ans = tkm.askyesno("Plot path?", "A path exists between the " "currently graph and the nodes you've asked to be added " "to the display. Would you like to plot that path?") if ans: # Yes to prompt # Add the nodes from the source graph which are part of # the path to the new_nodes set # Don't include end points because they are the two islands for u in path[1:-1]: Gu = B.node[u]['graph'].nodes() assert len(Gu) == 1; Gu = Gu[0] new_nodes.add(Gu) # Plot the new nodes self._plot_additional(new_nodes)
def _remove_max_edge(G, weight=None): """ Removes edge with the highest value on betweenness centrality. Repeat this step until more connected components than the connected components of the original graph are detected. """ number_components = nx.number_connected_components(G) while nx.number_connected_components(G) <= number_components and G.number_of_edges(): betweenness = nx.edge_betweenness_centrality(G, weight=weight) max_value = max(betweenness.values()) # Use a list of edges because G is changed in the loop for edge in list(G.edges()): if betweenness[edge] == max_value: G.remove_edge(*edge)
def girvan_newman_step(G): ''' INPUT: Graph G OUTPUT: None Run one step of the Girvan-Newman community detection algorithm. Afterwards, the graph will have one more connected component. ''' init_ncomp = nx.number_connected_components(G) ncomp = init_ncomp while ncomp == init_ncomp: bw = Counter(nx.edge_betweenness_centrality(G)) a, b = bw.most_common(1)[0][0] G.remove_edge(a, b) ncomp = nx.number_connected_components(G)
def sensi_diameter(G): import networkx as nx """ Compute graph sensitivity to node removal, in terms of the difference in graph diameter on the removal of each node in turn. This uses local function x_diameter(G), which is modified from networkx.diamter(G) to work on XGraphs. DL Urban (9 Feb 2007) """ # Starting diameter for full graph: if nx.is_connected(G): d0 = x_diameter(G) else: G0 = nx.connected_component_subgraphs(G) [0] # the largest subgraph d0 = x_diameter(G0) nc = nx.number_connected_components(G) # how many are there? sensi = {} for node in G.nodes(): ex = G.edges(node) # a set of edges adjacent to node; G.delete_edges_from(ex) # remove all of these, G.delete_node(node) # and then kill the node, too if nx.is_connected(G): dx = x_diameter(G) cuts = 0 else: Gx = nx.connected_component_subgraphs(G) [0] # the biggest ncx = nx.number_connected_components(G) if nc == ncx: cuts = 0 else: cuts = 1 dx = x_diameter(Gx) delta = d0 - dx G.add_node(node) # put the node and edges back again G.add_edges_from(ex) sensi[node] = (cuts, delta) # create and return a tuple (cuts, delta) return sensi
def get_single_network_measures(G, thr): f = open(out_prfx + 'single_network_measures.dat', 'a') N = nx.number_of_nodes(G) L = nx.number_of_edges(G) D = nx.density(G) cc = nx.average_clustering(G) compon = nx.number_connected_components(G) Con_sub = nx.connected_component_subgraphs(G) values = [] values_2 =[] for node in G: values.append(G.degree(node)) ave_deg = float(sum(values)) / float(N) f.write("%f\t%d\t%f\t%f\t%f\t%f\t" % (thr, L, D, cc, ave_deg, compon)) #1. threshold, 2. edges, 3. density 4.clustering coefficient #5. average degree, 6. number of connected components for i in range(len(Con_sub)): if nx.number_of_nodes(Con_sub[i])>1: values_2.append(nx.average_shortest_path_length(Con_sub[i])) if len(values_2)==0: f.write("0.\n") else: f.write("%f\n" % (sum(values_2)/len(values_2))) #7. shortest pathway f.close()
def print_info(G): #info prints name, type, number of nodes and edges, and average degree already print(nx.info(G)) print "Density: ", nx.density(G) print "Number of connected components: ", nx.number_connected_components(G) all_degree_cent = nx.degree_centrality(G) all_bet_cent = nx.betweenness_centrality(G) all_close_cent = nx.closeness_centrality(G) oldest = [] agerank = 0 names = [] print ("Node, Degree Centrality, Betweenness Centrality, Closeness Centrality:") for x in range(G.number_of_nodes()): names.append(G.nodes(data=True)[x][1]['label']) if G.nodes(data=True)[x][1]['agerank'] >= agerank: if G.nodes(data=True)[x][1]['agerank'] != agerank: oldest = [] agerank = G.nodes(data=True)[x][1]['agerank'] oldest.append(G.nodes(data=True)[x][1]) print G.nodes(data=True)[x][1]['label'],' %.2f' % all_degree_cent.get(x),\ ' %.2f' % all_bet_cent.get(x),\ ' %.2f' % all_close_cent.get(x) print "Oldest facebook(s): ", ', '.join([x['label'] for x in oldest]) return names
def graph_comp_sequence(Gts): import networkx as nx """ Gts is a graph thresholding sequence, a dictionary of graphs keyed by threshold distance, see edge_threshold_sequence(). This function takes that sequence and returns the number of components in each graph, along with the diameter of the largest component in each graph. The output is a dictionary of tuples (NC, D(G)) keyed by threshold distance. Requires: x_diameter(G), local function. Usage: The output is intended to be printed to a file (see write_table.txt for syntax), so that a plot can be constructed that illustrates the number of components and graph diameter as a function of distance. DL Urban (22 Feb 2007) """ seq = Gts.keys() gcs = {} for d in seq: g = Gts[d] if nx.is_connected(g): nc = 1 diam = x_diameter(g) else: nc = nx.number_connected_components(g) # the largest connected component, #0 in the list: gc = nx.connected_component_subgraphs(g)[0] diam = x_diameter(gc) gcs[d] = (nc, diam) return gcs
def main(): parser = argparse.ArgumentParser() parser.add_argument('graph') args = parser.parse_args() #vertices, edges = read_graph_from_file(args.graph) G = nx.read_edgelist(args.graph) n = G.number_of_nodes() print "nodes:", n print "edges:", G.number_of_edges() core_exponent = 0.5 core_vertices = filter(lambda v: G.degree(v) >= n**core_exponent, G.nodes()) print "core vertices:", len(core_vertices) core = G.subgraph(core_vertices) print "number of connected components in core:", nx.number_connected_components(core) # BFS-traversal fringe_fraction = 0.1 max_fringe_size = int(n * fringe_fraction) core_vertices = set(core_vertices) for i in range(int(1/fringe_fraction)+1): fringe_vertices = set(sorted(fringe(G, core_vertices), key=lambda v: -G.degree(v))[:max_fringe_size]) if not fringe_vertices: break print "{}: core={}, fringe={}".format(i+1, len(core_vertices), len(fringe_vertices)) core_vertices |= fringe_vertices
def stats(self): ''' Return all other stats Params: None Returns: dictionary of stats with keys(stats supported): num_connections - total number of connections max_degree - degree of highest degree node mean_degree - average degree empty - number of nodes with no connections variance - variance in degree distribution odd_length - odd length cycle exist? - Not implemented num_connected_components - number of connected components any_frac - fraction of nodes with connection(s) big_frac - fraction of nodes in the largest connected group ''' output = {} degrees = self.degree_dist() output['num_connections'] = int(sum([i * degrees[i] for i in degrees]) / 2.0) output['max_degree'] = max(degrees.keys()) output['mean_degree'] = sum([i * degrees[i] for i in degrees]) / float(self.size) if self.size else 0 output['empty'] = not output['max_degree'] output['variance'] = sum([degrees[degree] * (degree - output['mean_degree'])**2 for degree in degrees]) / float(self.size) if self.size else 0 # output['odd_length'] = 'Not implemented' output['num_connected_components'] = nx.number_connected_components(self.G) output['any_frac'] = sum([degrees[i] for i in degrees if i != 0]) / float(self.size) if self.size else 0 if not output['empty']: Gcc = nx.connected_component_subgraphs(self.G) num_in_greatest = len(nx.to_dict_of_lists(self.G)) output['big_frac'] = num_in_greatest / float(self.size) if self.size else 0 else: output['big_frac'] = 0 return output
def graphToCSV(G,graphtype, section, test): directory = "Datarows/"+graphtype+"/" if not os.path.exists(directory): os.makedirs(directory) writer_true = csv.writer(open(directory+section+"_true.csv", "a")) writer_false = csv.writer(open(directory+section+"_false.csv", "a")) A = nx.to_numpy_matrix(G) A = np.reshape(A, -1) arrGraph = np.squeeze(np.asarray(A)) nb_nodes = 0 for node in nx.nodes_iter(G): if len(G.neighbors(node))>0: nb_nodes += 1 meta_info = [test,nb_nodes,G.number_of_edges(),nx.number_connected_components(G)] # On garde la même taille d'élemt de valeur de vérité # if test: if os.path.getsize(directory+section+"_true.csv") <= os.path.getsize(directory+section+"_false.csv"): writer_true.writerow(np.append(arrGraph, meta_info)) return True else: return False else: if os.path.getsize(directory+section+"_false.csv") <= os.path.getsize(directory+section+"_true.csv"): writer_false.writerow(np.append(arrGraph, meta_info)) return True else: return False
def genMutants(G, params): """ Returns a list of mutant networks obtained from the given network G, using mutation parameters in params. """ Vcount = len(G) Ecount = len(G.edges()) mutants = [] for i in range(params["mutantsPerEpoch"]): mutantG = G.copy() rewirings = 0 while rewirings <= params["rewiringsPerMutant"]: u, v = mutantG.edges()[random.randrange(Ecount)] uNew = random.choice([u, v]) vNew = random.randrange(Vcount) if uNew == vNew or mutantG.has_edge(uNew, vNew): continue mutantG.remove_edge(u, v) mutantG.add_edge(uNew, vNew) if networkx.number_connected_components(mutantG) > 1: mutantG.remove_edge(uNew, vNew) mutantG.add_edge(u, v) else: rewirings += 1 mutants.append(mutantG) return mutants
def constructPedigreeGraphFromOneFile(self, inputFname=None): """ 2013.3.5 replace nx.DiGraph with custom DiGraphWrapper 2012.8.14 """ sys.stderr.write("Constructing pedigree-graph out of %s ..."%(inputFname)) DG=DiGraphWrapper() childNodeSet = set() reader = MatrixFile(inputFname) counter = 0 for row in reader: if counter ==0 and self.p_char.search(row[0]): #character in 1st cell of 1st line, it's header skip. continue childID = row[self.childColumnIndex] DG.add_node(childID) #in case this guy has no parents, then won't be added via add_edge() childNodeSet.add(childID) fatherID = row[self.fatherColumnIndex] if fatherID!='0': DG.add_edge(fatherID, childID) motherID = row[self.motherColumnIndex] if motherID!='0': DG.add_edge(motherID, childID) counter += 1 del reader sys.stderr.write("%s children, %s nodes. %s edges. %s connected components.\n"%(\ len(childNodeSet), DG.number_of_nodes(), DG.number_of_edges(), \ nx.number_connected_components(DG.to_undirected()))) return PassingData(DG=DG, childNodeSet=childNodeSet)
def get_characteristics(G, filename): import networkx as nx print 'calculating characteristics' n_nodes = nx.number_of_nodes(G) n_edges = nx.number_of_edges(G) n_components = nx.number_connected_components(G) print 'number of nodes:', n_nodes print 'number of edges:', n_edges print 'number of components:', n_components print 'degree histogram' check_sum = 0. degree_hist = {} for node in G: if G.degree(node) not in degree_hist: degree_hist[G.degree(node)] = 1 else: degree_hist[G.degree(node)] += 1 keys = degree_hist.keys() keys.sort() for item in keys: print item, degree_hist[item] check_sum += float(degree_hist[item])/float(n_nodes) print "check sum: %f" % check_sum #print 'clustering coefficient' print 'clustering coefficient of full network', nx.average_clustering(G) return 0
def constructPedigreeGraphFromPOEdgeFile(self, inputFname=None): """ 2012.8.23 inputFname is output of vervet/src/pedigree/DiscoverParentOffspringFromPlinkIBD.py """ sys.stderr.write("Constructing pedigree-graph out of %s ..."%(inputFname)) DG=nx.DiGraph() reader = None childNodeSet = set() reader = MatrixFile(inputFname) reader.constructColName2IndexFromHeader() parentIDIndex = reader.getColIndexGivenColHeader("parentID") childIDIndex = reader.getColIndexGivenColHeader("childID") distToPOVectorIndex = reader.getColIndexGivenColHeader("distToPOVector") counter = 0 for row in reader: childID = row[childIDIndex] childNodeSet.add(childID) parentID = row[parentIDIndex] distToPOVector = float(row[distToPOVectorIndex]) DG.add_edge(parentID, childID, weight=distToPOVector) counter += 1 del reader sys.stderr.write("%s children, %s nodes. %s edges. %s connected components.\n"%(\ len(childNodeSet), DG.number_of_nodes(), DG.number_of_edges(), \ nx.number_connected_components(DG.to_undirected()))) return PassingData(DG=DG, childNodeSet=childNodeSet)
def __init__(self, fname, interactive=True): self.fname = fname self.graph = nx.read_gpickle(fname) #apply_workaround(self.graph, thr=1e-3) #remove_intersecting_edges(self.graph) print "Number of connected components:", \ nx.number_connected_components(self.graph) self.selected_path_verts = [] if interactive: self.fig = plt.figure() self.path_patch = None G_p = nx.connected_component_subgraphs(self.graph)[0] #G_p = nx.connected_component_subgraphs(prune_graph(self.graph))[0] plot.draw_leaf(G_p, fixed_width=True) plt.ion() plt.show() self.edit_loop()
def connected_components(self, date, graph): try: self.connected_component_dict[date] = nx.number_connected_components(graph) except: print 'Connected Passed' raise
def computeGenus(Data, gap=2**16): """ Main function to compute the genus given Data (list of links, N x 2 cotacting pairs of genomic positions). Creates a graph G to compute the number of connected components and genus. First consider the case of no coincident ends for loop origins and terminations: Then each end in the link list would split into two, "real" (r) and "ghost" (g) where address of ghost on the real line is greater than address of the "real". Again, in the absence of coincident ends for each link: 1. The left ends "real" node shares an edge to the right end's "ghost" node 2. The left ends "ghost" node shares an edge to the right end's "real" node, exhausting edges correspoding to links 3. Along the real line, only "ghost" nodes connect by edge to "real" nodes, in linear order, and in consecutive pairing along the real line (backbone) 4. Count the number of original loops = P (before creating ghosts). Call it P 5. Count the number of loops (connected components) in the real + ghost graph, call it L 6. genus :math:`g = (P - L)/2` Now coming to resolving coincident ends in a manner that introduces no new crossings and doesn't increase genus: 1. Coincident ends (with n link originating or terminating) will have to be split into n real and n ghost nodes 2. This splitting has to be done in an order such that the splitting itself does not create new link crossings. Need to have a strategy for creating nodes such that points are easily ordered. Strategy: 1. Index all original link ends (nodes of G) by large even integers 2. Create ghosts on large odd numbers 3. Introduce new real nodes for coincident points in between these large even numbers 4. Ghosts' addresses are always s2 (here 1) greater than reals 5. gap/s1 (s1 is an option in splitAndModify() function) is the region within which all coincident ends are resolved, increase it if there are too many coincident ends *Args:* Data: Nx2 link data gap: Gap between addresses of nodes corresponding to the ends of links *Returns:* genus: computed genus G: networkx graph for computing genus LoopData: The list of edges corresponding to mapping of links backboneData: The list of edges corresponding to mapping of connectivity along the genome """ # cleam up operations # step 1: Order, left < right point along rows, #order rows by left and then by right, so that coincident points have increasing right link Data = np.sort( np.asarray(Data), axis=1) #sorted for each row so the left point is lesser than right Data, indx = uniquerows(Data) #clean up operation, will use length of Data # in genus computation, better have all loops to be unique #print Data Data = Data[np.lexsort(( Data[:, 1], Data[:, 0]))] #this soorts data by first column and then by second column G = nx.Graph() points = np.sort(np.unique(Data)) #unique points counter = dict.fromkeys( points, 0) #this is the counter of number of coinident points address = dict(zip(points, np.arange( 0, len(points) * gap, gap))) #initialize dict of address for orignial points, #with the gapped index along the line #print address LoopData = [] #stores the data to plot in the p1 p2 format for p1, p2 in Data: #order the loop data by left, right in chormosome position order if p1 == p2: raise ValueError('Loop cannot be zero length') G, counter, LoopData = splitAndModify(p1, p2, G, address, counter, LoopData, gap=gap) #now run through the points in order of position and introduce the real to ghost backbone edges sorted_graph = sorted(G.nodes(data=True), key=lambda (a, dct): dct['pos']) sorted_names = np.asarray([n for n, dct in sorted_graph]) #recall, sorted names are real-ghost pairs backbone_ghosts = sorted_names[1:-1: 2] #the first and the last points always #create a cluster together, sorted_names[1] is the first ghost backbone_reals = sorted_names[2:-1:2] #error handling, check that backbone_reals are all reals, surely then all ghosts are ghosts test_kinds = np.asarray([G.node[n]['kind'] for n in backbone_reals], dtype=str) #print backbone_reals #print backbone_ghosts #print sorted_names assert np.all(test_kinds == 'real' ), 'Fatal Error, graph construction is wrong, change gap?' #these are guranteed to be of equal length, but for sanity, throw error otherwise assert len(backbone_ghosts) == len( backbone_reals), "Creation of ghosts was wrong" backboneData = [] for p1, p2 in zip(backbone_ghosts, backbone_reals): G.add_edge(p1, p2) backboneData.append([p1, p2]) genus = (len(Data) - (nx.number_connected_components(G) - 1)) / 2 return genus, G, LoopData, backboneData
def execute(self, simplified=True): """ :return: """ # generate degree sequence self.__compute_degree_sequence() # generate community size dist exp_com_s = self.__compute_community_size_distribution() # assign node to community self.__node_to_community_initial_assignement(exp_com_s) # main loop (iteration) for self.it in tqdm.tqdm(range(0, self.iterations), ncols=100): # community check and event generation comp = nx.number_connected_components(self.graph) if comp <= len(self.communities): if self.__test_communities(): self.__generate_event(simplified) # node removal ar = random.random() if ar < self.del_node: self.__remove_node() # node addition ar = random.random() if ar < self.new_node: self.__add_node() # get nodes within communities that needs to adjust nodes = self.__get_nodes() # inner loop (nodes) for n in nodes: # discard deleted nodes if self.node_to_com[n] == -1: continue # check for decayed edges removal = self.__get_vanished_edges(n) # removal phase for n1 in removal: r = random.random() # edge renewal phase # check for intra/inter renewal thresholds if r <= self.renewal and self.node_to_com[n1] == self.node_to_com[n]\ or r > self.renewal and self.node_to_com[n1] != self.node_to_com[n]: # Exponential decay timeout = (self.it + 1) + int( random.expovariate(self.lambdad)) self.graph.adj[n][n1]["d"] = timeout else: # edge to be removed self.out_interactions.write( "%s\t%s\t-\t%s\t%s\n" % (self.it, self.count, n, n1)) self.graph.remove_edge(n, n1) # expected degree reached if self.graph.degree(n) >= self.exp_node_degs[n]: continue # decide if the node is active during this iteration action = random.random() # the node has not yet reached it expected degree and it acts in this round if self.graph.degree([n])[n] < self.exp_node_degs[n] and ( action <= self.paction or self.it == 0): com_nodes = list(self.communities[self.node_to_com[n]]) # probability for intra/inter community edges r = random.random() # check if at least sigma% of the node link are within the community s = self.graph.subgraph(com_nodes) d = s.degree([n])[n] # Intra community edges if r <= self.sigma and d < len(com_nodes) - 1: self.__new_intra_community_edge(s, n) # inter-community edges elif r > self.sigma: # if self.exp_node_degs[n]-d < (1-self.sigma) * s.number_of_nodes(): self.__new_inter_community_edge(n) self.__output_communities() self.out_events.write( "%s\n\t%s\n" % (self.iterations, self.performed_community_action)) self.out_interactions.flush() self.out_interactions.close() self.out_events.flush() self.out_events.close() return self.stable
def get_basic_information(g): print("'Data Mining Labs' network has {} active members with {} connections between each other."\ .format(g.number_of_nodes(), g.number_of_edges())) print('Number of connected components = {}'.format( nx.number_connected_components(g)))
def graph_exploration(): DG = load_obj('DG_train') print(DG.number_of_nodes(), DG.number_of_edges(), nx.number_connected_components(DG.to_undirected()))
G.add_edge(int(head), int(tail)) time[int(head)] = float(rtt) # get largest component and assign ping times to G0time dictionary Gcc = sorted(nx.connected_components(G), key=len, reverse=True)[0] G0 = G.subgraph(Gcc) G0.rtt = {} for n in G0: G0.rtt[n] = time[n] return G0 G = lanl_graph() print(G) print(nx.number_connected_components(G), "connected components") plt.figure(figsize=(8, 8)) # use graphviz to find radial layout pos = graphviz_layout(G, prog="twopi", root=0) # draw nodes, coloring by rtt ping time options = {"with_labels": False, "alpha": 0.5, "node_size": 15} nx.draw(G, pos, node_color=[G.rtt[v] for v in G], **options) # adjust the plot limits xmax = 1.02 * max(xx for xx, yy in pos.values()) ymax = 1.02 * max(yy for xx, yy in pos.values()) plt.xlim(0, xmax) plt.ylim(0, ymax) plt.show()
def report_on(g_dir, g_file): g = nx.read_graphml(os.path.join(g_dir, g_file)) print('%s,%d,%d,%d' % (utils.extract_filename(g_file), g.number_of_nodes(), g.number_of_edges(), nx.number_connected_components(g)))
def _is_valid(self, i, j, k, l, graph): g = graph.copy() g.remove_edge(i, j) g.remove_edge(k, l) return nx.number_connected_components(g) == 2
def _create_sorted_contour(yxcontour, center=None, rot=0, N=100, display=False): try: cc = [[1,0],[1,1],[0,1],[-1,1],[-1,0],[-1,-1],[0,-1],[1,-1]] # nearest neighbor graph import networkx as nx dist = cdist(yxcontour,yxcontour) dist[dist >= 2] = 0 G = nx.from_numpy_matrix(dist) # in rare cases there can be several subgraph, the largest is then selected if nx.number_connected_components(G) > 1: GG = [G.subgraph(c) for c in nx.connected_components(G)] G = GG[np.argmax([nx.number_of_nodes(g) for g in GG])] # Let's choose n1->n2, the first edge of the contour # n1 should be in G and the most south-left point Gnodes = np.array(G.nodes()) miny_idx = np.where(yxcontour[Gnodes,0] == np.min(yxcontour[Gnodes,0]))[0] n1 = Gnodes[miny_idx][np.argmin(yxcontour[Gnodes][miny_idx,1])] # n2 is the first neighbor of n1 after direct rotation from [-1,-1], to ensure tracing the outside contour CC = [list(a) for a in np.roll(cc, -cc.index([-1,-1]), axis=0)[1:]] V = [list(yxcontour[ni] - yxcontour[n1]) for ni in G.neighbors(n1)] n2 = list(G.neighbors(n1))[np.argmin([CC.index(vi) for vi in V])] nfirst = n1 nsec = n2 nnnyx = yxcontour[nfirst] - 1 G2 = nx.DiGraph() # directed graph G2.add_node(n1) G2.add_node(n2) G2.add_edge(n1,n2) while(n2 != nfirst): neigh = list(G.neighbors(n2)) neigh.remove(n1) if len(neigh) == 0: tmp=n2 n2=n1 n1=tmp elif len(neigh) == 1: n1=n2 n2=neigh[0] else: v = list(yxcontour[n1] - yxcontour[n2]) CC = [list(a) for a in np.roll(cc, -cc.index(v), axis=0)[1:]] V = [list(yxcontour[ni] - yxcontour[n2]) for ni in neigh] n1 = n2 n2 = neigh[np.argmin([CC.index(vi) for vi in V])] G2.add_node(n1) G2.add_node(n2) G2.add_edge(n1,n2) # in rare cases there can be more than a cycle (=for eg when two cycles connected by a single node), the largest is kept cycles = tuple(nx.simple_cycles(G2)) G3 = nx.DiGraph() nx.add_cycle(G3, cycles[np.argmax([len(cy) for cy in cycles])]) # Selection of nstart in G3: the closest angle to rot given the center rot = convert_angle(rot) # to make sure it is between -pi and +pi G3nodes = np.array(G3.nodes()) trcontour = np.vstack(cart2pol(yxcontour[G3nodes,1] - center[1], yxcontour[G3nodes,0] - center[0])).T idxSortedcontour = np.argsort(trcontour[:,0]) _idx = np.searchsorted(trcontour[:,0], rot, sorter=idxSortedcontour) nstart = G3nodes[idxSortedcontour][_idx if _idx<idxSortedcontour.shape[0] else 0] path = np.array(nx.find_cycle(G3, source=nstart)) # contour starting from the rotated init pathd = [dist[u,v] for (u,v) in path] cumsum = np.copy(pathd) for k in range(1,cumsum.shape[0]): cumsum[k] += cumsum[k-1] short_contour_idx = path[:,0][np.searchsorted(cumsum, np.arange(N)*cumsum[-1]/N)] if display: import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(1, 1, 1) G4 = nx.Graph() nodes = list(range(short_contour_idx.shape[0])) G4.add_nodes_from(nodes) G4.add_edges_from(np.array((nodes,np.roll(nodes, 1))).T) nx.draw(G, yxcontour, node_size=20, node_color ='k', edge_color='k') nx.draw(G3, yxcontour, node_size=20, node_color ='y', edge_color='y', with_labels=True) nx.draw(G4, yxcontour[short_contour_idx], node_size=20, node_color ='g', edge_color='g') plt.plot(yxcontour[0,0], yxcontour[0,1],'ob') plt.plot(yxcontour[nfirst,0], yxcontour[nfirst,1],'dg',ms=20) plt.plot(yxcontour[nsec,0], yxcontour[nsec,1],'Dg',ms=20) plt.plot(nnnyx[0], nnnyx[1],'or',ms=20) plt.plot(yxcontour[path[0,0],0], yxcontour[path[0,0],1],'or') plt.axis('equal') plt.show() plt.close() import pdb pdb.set_trace() return yxcontour[short_contour_idx] except nx.NetworkXError as e: print('utils.py - _create_sorted_contour() - nx.NetworkXError: %s' % e) return None except ValueError as e: print('utils.py - _create_sorted_contour() - ValueError : %s' % e) import pdb pdb.set_trace() return None except IndexError as e: print('utils.py - _create_sorted_contour() - IndexError : %s' % e) return None except MemoryError as e: print('utils.py - _create_sorted_contour() - MemoryError : %s' % e) import pdb pdb.set_trace() return None
def main(): # Directed Bison Network bison_file = 'moreno_bison/out.moreno_bison_bison' bison_graph = nx.DiGraph() create_network(bison_graph, bison_file, True) # Undirected Kangaroo Network kangaroo_file = 'moreno_kangaroo/out.moreno_kangaroo_kangaroo' kangaroo_graph = nx.Graph() create_network(kangaroo_graph, kangaroo_file, False) # Part A: Connected Component Analysis # Connected Component Analysis of Bison Directed Graph print("PART A:\n") print("Bison Directed Graph Connected Component Analysis", "\nWeakly connected: ", nx.is_weakly_connected(bison_graph), "\nNumber of Weakly CCs: ", nx.number_weakly_connected_components(bison_graph), "\nSize of largest CC: ", len(max(nx.weakly_connected_components(bison_graph), key=len)), "\nSize of smallest CC: ", len(min(nx.weakly_connected_components(bison_graph), key=len))) # Connected Component Analysis of Kangaroo Undirected Graph print("\nKangaroo Undirected Graph Connected Component Analysis", "\nConnected: ", nx.is_connected(kangaroo_graph), "\nNumber of CCs: ", nx.number_connected_components(kangaroo_graph), "\nSize of largest CC: ", len(max(nx.connected_components(kangaroo_graph), key=len)), "\nSize of smallest CC: ", len(min(nx.connected_components(kangaroo_graph), key=len))) # Part B Computing Degrees and finding the Probability distribution # Creation of an arrayList to store the degree for each node of Bison Network bison_degrees = [] for node in range(1, 26): bison_degrees.append(bison_graph.degree(node)) # Computing Mean and Standard Deviation for Directed x_label = stats(bison_degrees) # Creating a Histogram to plot the data of the degrees Bison Network plt.figure(3) plt.title('Part B: Histogram Directed Bison') plt.xlabel(x_label) plt.hist(bison_degrees, bins='auto') # Creation of an arrayList to store the degree for each node of Kangaroo Network kangaroo_degrees = [] for node in range(1, 17): kangaroo_degrees.append(kangaroo_graph.degree(node)) # Computing Mean and Standard Deviation for Undirected x_label = stats(kangaroo_degrees) # Creating a Histogram to plot the data of the degrees for Kangaroo Network plt.figure(4) plt.title('Part B: Histogram Undirected Kangaroo') plt.xlabel(x_label) plt.hist(kangaroo_degrees, bins='auto') # lt.show() # Part C Find the Path between 2 abritrary vertices in the largest CC # Creating two arbritrary nodes making sure they aren't the same number node1 = random.randrange(1, 27, 1) node2 = random.randrange(1, 27, 1) while node1 == node2: node1 = random.randrange(1, 27, 1) # I put a cutoff on the list of simple paths for now so I can atleast run something # cut off is the act of only focusing on the paths <= 5 # This section creates a list of all simple paths and then creates a list with the lengths of these paths bison_paths = list(nx.all_simple_paths(bison_graph, node1, node2, cutoff=5)) bison_p_lengths = [] for node in range(0, len(bison_paths) - 1): bison_p_lengths.append(len(bison_paths[node])) x_label = stats(bison_p_lengths) # Creating a histogram for the degrees of the graph plt.figure(5) plt.title('Part C: Histogram Directed Bison Paths') plt.xlabel(x_label) plt.hist(bison_p_lengths, bins='auto') # plt.show() # Creating two arbitrary nodes making sure they aren't the same number node1 = random.randrange(1, 17, 1) node2 = random.randrange(1, 17, 1) while node1 == node2: node1 = random.randrange(1, 17, 1) # This section creates a list of all simple paths and then creates a list with the lengths of these paths kangaroo_paths = list( nx.all_simple_paths(kangaroo_graph, node1, node2, cutoff=5)) kangaroo_p_lengths = [] for node in range(0, len(kangaroo_paths) - 1): kangaroo_p_lengths.append(len(kangaroo_paths[node])) x_label = stats(kangaroo_p_lengths) # Creating a histogram for the degrees of the graph plt.figure(6) plt.title('Part C: Histogram Undirected Kangaroo Paths') plt.xlabel(x_label) plt.hist(kangaroo_p_lengths, bins='auto') # plt.show() # Part D Find the Simple Circuits between 2 abritrary vertices in the largest CC # UNABLE TO RUN BISON CIRCUITS ON LAPTOP THERE ARE TO MANY AND I CANNOT CREATE A CUTOFF # Creates a list of simple cycles and then creates another list of the lengths of the cycles # bison_circuits = list(nx.simple_cycles(bison_graph)) # bison_c_lengths = [] # for node in range(0,len(bison_circuits)-1): # bison_c_lengths.append(len(bison_circuits[node])) # # x_label = stats(bison_c_lengths) # # plt.figure(7) # plt.title('PART D: Histogram Directed Bison Circuits') # plt.xlabel(x_label) # plt.hist(bison_c_lengths, bins = 'auto') # You can't use the simple cycle function for undirected graphs so I used the basis function. # Creates a list of simple cycles and then creates another list of the lengths of the cycles kangaroo_circuits = nx.cycle_basis(kangaroo_graph) kangaroo_c_lengths = [] for node in range(0, len(kangaroo_circuits) - 1): kangaroo_c_lengths.append(len(kangaroo_circuits[node])) x_label = stats(kangaroo_c_lengths) plt.figure(7) plt.title('PART D: Histogram Undirected Kangaroo Circuits') plt.xlabel(x_label) plt.hist(kangaroo_c_lengths, bins='auto') # plt.show() # Part E Check if Eulerian, Find a Eulerian Path print("\nPART E:") print("\nDirected Bison Graph") print("Euelerian: ", nx.is_eulerian(bison_graph)) print("Has a Eulerian Path: ", nx.has_eulerian_path(bison_graph)) print("\nUndirected Kangaroo Graph") print("Euelerian: ", nx.is_eulerian(kangaroo_graph)) print("Has a Eulerian Path: ", nx.has_eulerian_path(kangaroo_graph)) # Part F: Convert to Matrix. # I don't know if this covers everything? bison_matrix = nx.to_numpy_matrix(bison_graph) plt.matshow(bison_matrix) # plt.show() kangaroo_matrix = nx.to_numpy_matrix(kangaroo_graph) plt.matshow(kangaroo_matrix) # plt.show() # Part G: Copy Largest CC comparing it to a copy and a slightly different CC print("\nPart G:\n") # copying the largest connected component from the Bison Directed graph bison_n1 = nx.Graph() largest_cc_bison = list( max(nx.weakly_connected_components(bison_graph), key=len)) for i in largest_cc_bison: bison_n1.add_edge(i, i + 1) bison_n2 = bison_n1.copy() # Checking Equivalence between copied graphs print("Is bison_n1 Equivalent to bison_n2?") compare(bison_n1, bison_n2) # Checking Equivalence between copied graphs but one has an extra 10 edges print("\nIs bison_n1 Equivalent to N3?") bison_n3 = bison_n2.copy() add_10_edges(bison_n3, len(bison_n3)) compare(bison_n1, bison_n3) # Repeat for Kangaroo Undirected Network kangaroo_n1 = nx.Graph() largest_cc_kangaroo = list( max(nx.connected_components(kangaroo_graph), key=len)) for i in largest_cc_kangaroo: kangaroo_n1.add_edge(i, i + 1) kangaroo_n2 = kangaroo_n1.copy() print("\nIs kangaroo_n1 Equivalent to kangaroo_n2?") compare(kangaroo_n1, kangaroo_n2) print("\nIs kangaroo_n1 Equivalent to N3?") kangaroo_n3 = kangaroo_n2.copy() add_10_edges(kangaroo_n3, len(kangaroo_n3)) compare(kangaroo_n1, kangaroo_n3) # Part H: Generate Minimum Spanning Tree print("\nPart H:\n") # Cannot generate SPanning tree for Directed networks # Generating a minimum spanning tree for Undirected network kangaroo_min_tree = nx.minimum_spanning_tree(kangaroo_graph) print( "~A Minimum Spanning Tree was created for the Undirected Kangaroo Graph~" ) tree_or_forest(kangaroo_min_tree) # Finding two random nodes that are connected x = 0 y = 0 while (not (kangaroo_min_tree.has_edge(x, y))): x = random.randrange(1, 17, 1) y = random.randrange(1, 17, 1) while x == y: x = random.randrange(1, 17, 1) # Removing the found edge print("\nAn edge from the spanning tree was removed") kangaroo_min_tree.remove_edge(x, y) tree_or_forest(kangaroo_min_tree) # Part I: Dijkstra's Algorithm bison_pairs = list(nx.all_pairs_node_connectivity(bison_graph)) connected_nodes = [] for i in bison_pairs: for j in bison_pairs: if bison_graph.has_edge(i, j + 1): connected_nodes.append([i, j + 1]) dijkstra_paths = [] length = len(connected_nodes) for i in range(0, length - 1): for j in range(0, 1): dijkstra_paths.append( int( nx.dijkstra_path_length(bison_graph, connected_nodes[i][j], connected_nodes[i][j + 1]))) x_label = stats(dijkstra_paths) plt.figure() plt.xlabel(x_label) plt.title('Directed Bison Dijkstra Path Lengths') plt.hist(dijkstra_paths) # plt.show() #Created a new temporary graph with edges from the connected nodes and weights from the distance list temp_bison = nx.DiGraph() for i in range(0, length - 1): j = 0 temp_bison.add_edge(connected_nodes[i][j], connected_nodes[i][j + 1], weight=dijkstra_paths[i]) # I dont really know if this creates a matrix for the weigths this is just what i did in a previous part bison_distance_matrix = nx.to_numpy_matrix(temp_bison) plt.matshow(bison_distance_matrix) plt.show() # Repeat for Kangaroo Undirected KangarooPairs = list(nx.all_pairs_node_connectivity(KangarooGraph)) ConnectedNodesK = [] for i in KangarooPairs: for j in KangarooPairs: if KangarooGraph.has_edge(i, j + 1): ConnectedNodesK.append([i, j + 1]) dijkstra_PathsK = [] length = len(ConnectedNodesK) for i in range(0, length): dijkstra_PathsK.append( int( nx.dijkstra_path_length(KangarooGraph, ConnectedNodesK[i][0], ConnectedNodesK[i][1]))) xLabel = Stats(dijkstra_PathsK) plt.figure() plt.xlabel(xLabel) plt.title('Undirected Kangaroo Dijkstra Path Lengths') plt.hist(dijkstra_PathsK) plt.show() temp_kangaroo = nx.Graph() for i in range(0, length - 1): j = 0 temp_kangaroo.add_edge(ConnectedNodesK[i][j], ConnectedNodesK[i][j + 1], weight=dijkstra_PathsK[i]) kangaroo_distance_matrix = nx.to_numpy_matrix(temp_kangaroo) plt.matshow(kangaroo_distance_matrix) plt.show()
def compute_plotmountains(G, sname): # print graphname orig_core_nums = nx.core_number(G) print G.number_of_nodes(), G.number_of_edges(), max( orig_core_nums.values()) print 'core nos computed' # Initializing node_CNdrops_mountainassignment # 'node_CNdrops_mountainassignment' is a dict where keys are nodeIDS # Each value is tuple of the maximum drop in core number observed for this node and the mountain to which it is assigned. node_CNdrops_mountainassignment = {} for n in G.nodes(): node_CNdrops_mountainassignment[n] = [ 0, -1 ] #diff in core number, assignment to a mountain H = G.copy() H_nodes = set(G.nodes()) current_core_nums = orig_core_nums.copy() current_d = max(current_core_nums.values()) print 'current_d = ', current_d # 'current_plotmountain_id' keeps track of numbering of the plot-mountains current_plotmountain_id = 0 peak_numbers = {} # Each iteration of the while loop finds a k-contour while (len(H.nodes()) > 0): # degen_core is the degeneracy of the graph degen_core = nx.k_core(H) # Degen-core # Note that the actual mountains may consist of multiple components. # To compute their core-periphery values or to analyze each component, # use the following line to find the components res_core_comps = nx.connected_component_subgraphs( degen_core) #The comps in Degen-core print('components', nx.number_connected_components(degen_core), len(degen_core)) # But in the mountain plot we plot the separate components related to a k-contour as a single mountain. # So, ignore the components for making mountain plots # Nodes in the k-contour. Their current core number is their peak number. for comp in res_core_comps: #kcontour_nodes = degen_core.nodes() kcontour_nodes = comp.nodes() for n in kcontour_nodes: peak_numbers[n] = current_core_nums[n] # Removing the kcontour (i.e. degeneracy) and re-computing core numbers. H_nodes = H_nodes.difference(set(kcontour_nodes)) H = G.subgraph(list(H_nodes)) new_core_nums = nx.core_number(H) for n in kcontour_nodes: # For the nodes in kcontour, its removal causes its core number to drop to 0. # Checking is this drop is greater than the drop in core number observed for these nodes in previous iterations if current_core_nums[n] - 0 > node_CNdrops_mountainassignment[ n][0]: node_CNdrops_mountainassignment[n][0] = current_core_nums[ n] node_CNdrops_mountainassignment[n][ 1] = current_plotmountain_id for n in new_core_nums: # Checking is this drop is greater than the drop in core number observed for these nodes in previous iterations if current_core_nums[n] - new_core_nums[ n] > node_CNdrops_mountainassignment[n][0]: node_CNdrops_mountainassignment[n][ 0] = current_core_nums[n] - new_core_nums[n] node_CNdrops_mountainassignment[n][ 1] = current_plotmountain_id current_plotmountain_id += 1 current_core_nums = new_core_nums.copy() print 'peak nos computed' plot_mountains(node_CNdrops_mountainassignment, orig_core_nums, peak_numbers, G, sname)
def mask_test_edges(adj, test_frac=.1, val_frac=.05, prevent_disconnect=True, verbose=False): # NOTE: Splits are randomized and results might slightly deviate from reported numbers in the paper. # Remove diagonal elements adj = adj - sp.dia_matrix( (adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape) adj.eliminate_zeros() # Check that diag is zero: assert np.diag(adj.todense()).sum() == 0 g = nx.from_scipy_sparse_matrix(adj) orig_num_cc = nx.number_connected_components(g) adj_triu = sp.triu(adj) # upper triangular portion of adj matrix adj_tuple = sparse_to_tuple( adj_triu) # (coords, values, shape), edges only 1 way edges = adj_tuple[0] # all edges, listed only once (not 2 ways) # edges_all = sparse_to_tuple(adj)[0] # ALL edges (includes both ways) num_test = int( np.floor(edges.shape[0] * test_frac)) # controls how large the test set should be num_val = int( np.floor(edges.shape[0] * val_frac)) # controls how alrge the validation set should be # Store edges in list of ordered tuples (node1, node2) where node1 < node2 edge_tuples = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in edges] all_edge_tuples = set(edge_tuples) train_edges = set(edge_tuples) # initialize train_edges to have all edges test_edges = set() val_edges = set() # Iterate over shuffled edges, add to train/val sets np.random.shuffle(edge_tuples) counter = 0 for edge in edge_tuples: counter += 1 if counter % 100 == 0: print("processed:" + str(counter)) # print edge node1 = edge[0] node2 = edge[1] # If removing edge would disconnect a connected component, backtrack and move on g.remove_edge(node1, node2) if prevent_disconnect == True: if nx.number_connected_components(g) > orig_num_cc: g.add_edge(node1, node2) continue # Fill test_edges first if len(test_edges) < num_test: test_edges.add(edge) train_edges.remove(edge) # Then, fill val_edges elif len(val_edges) < num_val: val_edges.add(edge) train_edges.remove(edge) # Both edge lists full --> break loop elif len(test_edges) == num_test and len(val_edges) == num_val: break if (len(val_edges) < num_val or len(test_edges) < num_test): print( "WARNING: not enough removable edges to perform full train-test split!" ) print("Num. (test, val) edges requested: (", num_test, ", ", num_val, ")") print("Num. (test, val) edges returned: (", len(test_edges), ", ", len(val_edges), ")") if prevent_disconnect == True: assert nx.number_connected_components(g) == orig_num_cc if verbose == True: print('creating false test edges...') test_edges_false = set() while len(test_edges_false) < num_test: idx_i = np.random.randint(0, adj.shape[0]) idx_j = np.random.randint(0, adj.shape[0]) if idx_i == idx_j: continue false_edge = (min(idx_i, idx_j), max(idx_i, idx_j)) # Make sure false_edge not an actual edge, and not a repeat if false_edge in all_edge_tuples: continue if false_edge in test_edges_false: continue test_edges_false.add(false_edge) if verbose == True: print('creating false val edges...') val_edges_false = set() while len(val_edges_false) < num_val: idx_i = np.random.randint(0, adj.shape[0]) idx_j = np.random.randint(0, adj.shape[0]) if idx_i == idx_j: continue false_edge = (min(idx_i, idx_j), max(idx_i, idx_j)) # Make sure false_edge in not an actual edge, not in test_edges_false, not a repeat if false_edge in all_edge_tuples or false_edge in test_edges_false or false_edge in val_edges_false: continue val_edges_false.add(false_edge) if verbose == True: print('creating false train edges...') train_edges_false = set() while len(train_edges_false) < len(train_edges): idx_i = np.random.randint(0, adj.shape[0]) idx_j = np.random.randint(0, adj.shape[0]) if idx_i == idx_j: continue false_edge = (min(idx_i, idx_j), max(idx_i, idx_j)) # Make sure false_edge in not an actual edge, not in test_edges_false, # not in val_edges_false, not a repeat if false_edge in all_edge_tuples or false_edge in test_edges_false or false_edge in val_edges_false or false_edge in train_edges_false: continue train_edges_false.add(false_edge) if verbose == True: print('final checks for disjointness...') # assert: false_edges are actually false (not in all_edge_tuples) assert test_edges_false.isdisjoint(all_edge_tuples) assert val_edges_false.isdisjoint(all_edge_tuples) assert train_edges_false.isdisjoint(all_edge_tuples) # assert: test, val, train false edges disjoint assert test_edges_false.isdisjoint(val_edges_false) assert test_edges_false.isdisjoint(train_edges_false) assert val_edges_false.isdisjoint(train_edges_false) # assert: test, val, train positive edges disjoint assert val_edges.isdisjoint(train_edges) assert test_edges.isdisjoint(train_edges) assert val_edges.isdisjoint(test_edges) if verbose == True: print('creating adj_train...') # Re-build adj matrix using remaining graph adj_train = nx.adjacency_matrix(g) # Convert edge-lists to numpy arrays train_edges = np.array([list(edge_tuple) for edge_tuple in train_edges]) train_edges_false = np.array( [list(edge_tuple) for edge_tuple in train_edges_false]) val_edges = np.array([list(edge_tuple) for edge_tuple in val_edges]) val_edges_false = np.array( [list(edge_tuple) for edge_tuple in val_edges_false]) test_edges = np.array([list(edge_tuple) for edge_tuple in test_edges]) test_edges_false = np.array( [list(edge_tuple) for edge_tuple in test_edges_false]) # NOTE: these edge lists only contain single direction of edge! return adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false
import networkx from networkx.drawing.nx_pydot import write_dot with open('day12-input.txt') as f: lines = f.readlines() graph = networkx.Graph() for line in lines: line = str.replace(line, ' ', '') node, neighbours = line.strip().split('<->') for neighbour in neighbours.split(','): graph.add_edge(node, neighbour) write_dot(graph, 'day12-graph.dot') print(len(networkx.node_connected_component(graph, '0'))) print(networkx.number_connected_components(graph))
def words_graph(): """Return the words example graph from the Stanford GraphBase""" fh = gzip.open('words4_dat.txt.gz', 'r') words = set() for line in fh.readlines(): line = line.decode() if line.startswith('*'): continue w = str(line[0:4]) words.add(w) return generate_graph(words) if __name__ == '__main__': G = words_graph() print("Loaded words_dat.txt containing 5757 five-letter English words.") print("Two words are connected if they differ in one letter.") print("Graph has %d nodes with %d edges" % (nx.number_of_nodes(G), nx.number_of_edges(G))) print("%d connected components" % nx.number_connected_components(G)) for (source, target) in [('cold', 'warm'), ('love', 'hate')]: print("Shortest path between %s and %s is" % (source, target)) try: sp = nx.shortest_path(G, source, target) for n in sp: print(n) except nx.NetworkXNoPath: print("None")
# libraries import pandas as pd import numpy as np import networkx as nx import matplotlib.pyplot as plt data = pd.read_csv('../data/edges_with_weight/edges_with_weight.csv', ';') print(data) # Build your graph. Note that we use the DiGraph function to create the graph! G = nx.from_pandas_edgelist(data, 'src', 'dest', create_using=nx.DiGraph(), edge_attr='weight') #plt.figure(1, figsize=(10,10)) # Make the graph #nx.draw(G, with_labels=False, node_size=100, alpha=0.6, arrows=True, font_size=8, pos=nx.kamada_kawai_layout(G)) #plt.show() G = nx.from_pandas_edgelist(data, 'src', 'dest', create_using=nx.Graph(), edge_attr='weight') print nx.number_connected_components(G)
def graphInfo(graph, weighted=2, path_lengths=False, nodeInfo=False, edgeInfo=False): """ Give a Basic Analysis of the Graph weighted = {0:"only unweighted",1:"only weighted",else:"both weighted and unweighted"} path_lengths = {True:""} """ graph_info = {} nfnodes = graph.number_of_nodes() nfedges = graph.number_of_edges() nfComponents = nx.number_connected_components(graph) density = nx.density(graph) graph_info = { "nfnodes": nfnodes, "nfedges": nfedges, "nfComponents": nfComponents, "density": density } if weighted == 0: unweighted_size = graph.size(weight=None) graph_info['unweighted_size'] = unweighted_size elif weighted == 1: weighted_size = graph.size(weight="weight") graph_info['weighted_size'] = weighted_size else: unweighted_size = graph.size(weight=None) weighted_size = graph.size(weight="weight") graph_info['unweighted_size'] = unweighted_size graph_info['weighted_size'] = weighted_size max_unweighted_node_degree = 0 max_weighted_node_degree = 0 if weighted == 0 and nodeInfo == True: sorted_nodes_by_unweighted_degree = sortNodesByDegree(graph, weight=None, reverse=True) if nfnodes >= 2: max_unweighted_node_degree = sorted_nodes_by_unweighted_degree[0] graph_info[ 'max_unweighted_node_degree'] = max_unweighted_node_degree elif weighted == 1: if nfedges > 1 and edgeInfo == True: sorted_edges = sortEdgesByWeight(graph) max_edge_weight = None max_edge_weight = sorted_edges[0] graph_info['max_edge_weight'] = max_edge_weight if nfnodes >= 2 and nodeInfo == True: sorted_nodes_by_weighted_degree = sortNodesByDegree( graph, weight="weight", reverse=True) max_weighted_node_degree = sorted_nodes_by_weighted_degree[0] graph_info['max_weighted_node_degree'] = max_weighted_node_degree elif weighted == 2: sorted_edges = sortEdgesByWeight(graph) sorted_nodes_by_weighted_degree = sortNodesByDegree(graph, weight="weight", reverse=True) sorted_nodes_by_unweighted_degree = sortNodesByDegree(graph, weight=None, reverse=True) max_edge_weight = None if nfedges > 1 and edgeInfo == True: max_edge_weight = sorted_edges[0] graph_info['max_edge_weight'] = max_edge_weight if nfnodes >= 2 and nodeInfo == True: max_unweighted_node_degree = sorted_nodes_by_unweighted_degree[0] max_weighted_node_degree = sorted_nodes_by_weighted_degree[0] graph_info[ 'max_unweighted_node_degree'] = max_unweighted_node_degree graph_info['max_weighted_node_degree'] = max_weighted_node_degree weighted_avg_path_length = math.inf unweighted_avg_path_length = math.inf if nfComponents == 1 and path_lengths == True: weighted_avg_path_length = nx.average_shortest_path_length( graph, weight="weight") unweighted_avg_path_length = nx.average_shortest_path_length( graph, weight=None) graph_info['weighted_avg_path_length'] = weighted_avg_path_length graph_info['unweighted_avg_path_length'] = unweighted_avg_path_length return graph_info
self.trainNodes = np.arange(min(testIdx)) self.trainNodesWithLabel = np.arange(len(x)) if __name__ == "__main__": cora = dataGenerator("cora") graph = nx.Graph() graph.add_node(1) graph.add_node(2) graph.add_node(7) graph.add_node(4) graph.add_node(5) graph.add_edge(1, 2) graph.add_edge(1, 4) graph.add_edge(1, 7) graph.add_edge(1, 5) graph.add_edge(4, 7) graph.add_edge(4, 5) graph.add_node(6) graph.add_node(8) graph.add_edge(6, 8) print(nx.number_connected_components(graph)) graph = graph.subgraph([1, 2, 4, 5]) nx.draw(graph, with_labels=True, font_weight='bold') # nx.draw_shell(graph) plt.show()
def main(argv): " main function" global g_name,d_component,n,k,T_max,svd_routine,svd_parameter,seed,max_n,J0,J2,bias,beta,dynamic_type; H=nx.Graph(); try: opts, args=getopt.getopt(argv,"hg:d:n:k:t:s:p:i:m:J:j:b:B:e:","help"); except getopt.GetoptError: tl_main.usage(); sys.exit(2); for opt,arg in opts: if opt in("-h","--help"): tl_main.usage(); sys.exit(); elif opt=='-d': d_component=int(arg); elif opt=='-n': n=int(arg); elif opt=='-k': k=int(arg); elif opt=='-t': T_max=int(arg); elif opt=='-s': svd_routine=int(arg); elif opt=='-p': svd_parameter=int(arg); elif opt=='-i': seed=int(arg); elif opt=='-m': max_n=int(arg); elif opt=='-J': J0=float(arg); elif opt=='-j': J2=float(arg)/float(n); elif opt=='-b': bias=float(arg); elif opt=='-B': beta=float(arg); elif opt=='-e': dynamic_type=int(arg); p=float(k)/float(n); if(svd_routine==1): svd_parameter=float(10.**(-svd_parameter)); print'svd_parameter',svd_parameter # GENERATE GRAPH H=nx.random_regular_graph(k, n, seed); g_name='reg'; out_dir='../data/'; if(dynamic_type==1): outfile = open(out_dir+g_name+str(n)+'k'+str(k)+'s'+str(svd_routine)+str(svd_parameter)+'e'+str(dynamic_type)+'b'+str(bias)+'B'+str(beta)+'dyn.dat', 'w') out_svd=open(out_dir+g_name+str(n)+'k'+str(k)+'s'+str(svd_routine)+str(svd_parameter)+'e'+str(dynamic_type)+'b'+str(bias)+'B'+str(beta)+'svd.dat', 'w') histo_file=open(out_dir+g_name+str(n)+'k'+str(k)+'s'+str(svd_routine)+str(svd_parameter)+'e'+str(dynamic_type)+'b'+str(bias)+'B'+str(beta)+'histo.dat', 'w') out_parameters=open(out_dir+'parameters_'+g_name+str(n)+'k'+str(k)+'s'+str(svd_routine)+str(svd_parameter)+'e'+str(dynamic_type)+'b'+str(bias)+'B'+str(beta)+'parameters.dat', 'w') elif(dynamic_type==0): outfile = open(out_dir+g_name+str(n)+'k'+str(k)+'s'+str(svd_routine)+str(svd_parameter)+'e'+str(dynamic_type)+'b'+str(bias)+'dyn.dat', 'w') out_parameters=open(out_dir+'parameters_'+g_name+str(n)+'k'+str(k)+'s'+str(svd_routine)+str(svd_parameter)+'e'+str(dynamic_type)+'b'+str(bias)+'parameters.dat', 'w') out_svd=open(out_dir+g_name+str(n)+'k'+str(k)+'s'+str(svd_routine)+str(svd_parameter)+'e'+str(dynamic_type)+'b'+str(bias)+'svd.dat', 'w') print H,n,k, g_name if(d_component==1): print 'Connected components',nx.number_connected_components(H); H=sorted(list(nx.connected_component_subgraphs(H)), key = len, reverse=True)[0]; # Giant component is the first element of the list H=nx.convert_node_labels_to_integers(H); G=nx.DiGraph(H) if(dynamic_type==1):sumJ=tl_dyn.assign_J(J0,J2,G,histo_file); # Fix disorder for Glauber dynamics tl_main.output_parameters(out_parameters,G,d_component,n,k,T_max,svd_routine,svd_parameter,seed,max_n,bias,dynamic_type,beta,J0,J2); out_parameters.close(); degree_sequence=sorted(nx.degree(G).values(),reverse=True) # degree sequence dmax=max(degree_sequence)/2 if(nx.is_directed(G)) else max(degree_sequence); print 'k_max=', dmax print 'J2,J0 ' , J2,J0 # Initial Probability distribution ------- P=np.array([ [0.5-bias,0.5+bias] for u in G.nodes() ]); # INITIALIZE observables --------------------------- m=[0. for t in range(T_max)]; # magnetization q=[0. for t in range(T_max)]; # EA parameter (overlap) Cor=[0. for t in range(T_max)]; # spin-spin correlation Z=G.number_of_nodes(); if(dynamic_type==0): for u in G.nodes(): G.node[u]['w']=tl_dyn.w_majority(u,d,G); m[0]+=(P[u][1]-P[u][0]);Z+=1; q[0]+=(P[u][1]-P[u][0])*(P[u][1]-P[u][0]); m[0]/=float(Z);q[0]/=float(Z); elif(dynamic_type==1): for u in G.nodes(): J=[ G[u][v]['J'] for v in G.neighbors(u) ]; G.node[u]['w']=tl_dyn.w_glauber(u,d,G,beta,J); m[0]+=(P[u][1]-P[u][0]);Z+=1; q[0]+=(P[u][1]-P[u][0])*(P[u][1]-P[u][0]); m[0]/=float(Z);q[0]/=float(Z);Z=0.; Cor[0]=0.; # <== We start from a factorized initial condition # INITIALIZE A's, M's, C's for u,v in G.edges(): M=[random.randrange(Mmin,Mmax) for t in range(T+2)] # messages i --> j M[0]=1;M[T]=1; # If T=0 just need to initialize A(0) and A(1) G[u][v]['As']=[np.random.rand(d,M[0],1)]; G[u][v]['As'].append(np.random.rand(d,1,1,M[T])); G[u][v]['OldA']=[np.random.rand(d,M[0],1)]; G[u][v]['As'][0][0,:,:]=1.#P[v][0]; G[u][v]['As'][0][1,:,:]=1.#bP[v][1]; G[u][v]['As'][1][0,:,:,:]=P[u][0]; G[u][v]['As'][1][1,:,:,:]=P[u][1]; G[u][v]['OldA']=G[u][v]['As'][0]; for u in G.nodes(): G.node[u]['marginal']=P[u]; sv_ratio=1.;norm_ratio=0.; t0 = time.time();t1=t0; for t in range(T,T_max): print >> out_svd, t, tl_obs.max_dimM(G),sv_ratio,norm_ratio,str(t1-t0);out_svd.flush(); if(t>0): out_mag = open(out_dir+g_name+str(n)+'k'+str(k)+'s'+str(svd_routine)+str(svd_parameter)+'t'+str(t)+'b'+str(bias)+'B'+str(beta)+'mag.dat', 'w') out_cor = open(out_dir+g_name+str(n)+'k'+str(k)+'s'+str(svd_routine)+str(svd_parameter)+'t'+str(t)+'b'+str(bias)+'B'+str(beta)+'corr.dat', 'w') Cor[t]=tl_obs.calulate_marginals(G,d,out_mag,out_cor); out_mag.close();out_cor.close(); t1 = time.time() m[t],q[t]=tl_obs.calulate_observable(G,d); sv_ratio,norm_ratio=otu.update(dynamic_type,beta,G,d,t,P,svd_routine=svd_routine,svd_threshold=svd_parameter,max_n=max_n); t2 = time.time() time_diff = round(t2 - t1) print >> outfile, t,m[t], q[t], Cor[t], str(t1-t0); outfile.flush(); print 't = ', t, ' calculated in ',time_diff, 's'; print ' <m>= ',m[t], ' q=', q[t], 'Cor=', Cor[t], print 'maxMdim=',tl_obs.max_dimM(G),' sv_ratio=',sv_ratio,' norm_ratio=',norm_ratio; # END CYCLE OVER t time_diff = round(time.time() - t0) minute = time_diff / 60 seconds = time_diff % 60 # Same as time_diff - (minutes * 60) print 'Total time=', minute, 'm and', seconds, 's' outfile.close(); out_svd.close();
def analysis(graph, prefix, flag): degree_centrality = sorted(nx.degree_centrality(graph).items(), key=lambda x: x[1], reverse=True) betweenness_centrality = sorted(nx.betweenness_centrality(graph).items(), key=lambda x: x[1], reverse=True) closeness_centrality = sorted(nx.closeness_centrality(graph).items(), key=lambda x: x[1], reverse=True) eigenvector_centrality = sorted( nx.eigenvector_centrality_numpy(graph).items(), key=lambda x: x[1], reverse=True) clustering = [(k, v) for k, v in nx.clustering(graph).items()] average_degree_connectivity = sorted([ (k, v) for k, v in nx.average_degree_connectivity(graph).items() ]) average_neighbor_degree = sorted(nx.average_neighbor_degree(graph).items(), key=lambda x: x[1], reverse=True) print(prefix + " connected components = " + str(nx.number_connected_components(graph))) print(prefix + " degree assortativity coefficient = " + str(nx.degree_assortativity_coefficient(graph))) print(prefix + " density = " + str(nx.density(graph))) cnt = 0 for c in nx.connected_components(graph): subG = nx.subgraph(graph, c) print(type(subG)) print(c) eccentricity = sorted(nx.eccentricity(subG).items(), key=lambda x: x[1], reverse=True) create_excel(subG, eccentricity, 'Ime', 'Ekscentricnost', prefix + '_comp' + str(cnt) + '_eccentricity.xlsx', flag) print(prefix + " comp" + str(cnt) + " diameter = " + str(nx.diameter(subG))) print(prefix + " comp" + str(cnt) + " radius = " + str(nx.radius(subG))) print(prefix + " comp" + str(cnt) + " center = " + str(nx.center(subG))) print(prefix + " comp" + str(cnt) + " average shortest path length = " + str(nx.average_shortest_path_length(subG))) cnt += 1 create_excel(graph, degree_centrality, 'Ime', 'Centralnost po stepenu', prefix + '_degree_centrality.xlsx', flag) create_excel(graph, betweenness_centrality, 'Ime', 'Relaciona Centralnost', prefix + '_betweenness_centrality.xlsx', flag) create_excel(graph, closeness_centrality, 'Ime', 'Centralnost po bliskosti', prefix + '_closeness_centrality.xlsx', flag) create_excel(graph, eigenvector_centrality, 'Ime', 'Eigenvector centralnost', prefix + '_eigenvector_centrality.xlsx', flag) create_excel(graph, clustering, 'Ime', 'Faktor klasterizacije', prefix + '_clustering.xlsx', flag) create_excel(graph, average_degree_connectivity, 'Stepen', 'Prosecan stepen suseda', prefix + '_average_degree_connectivity.xlsx', False) create_excel(graph, average_neighbor_degree, 'Ime', 'Stepen suseda', prefix + '_average_neighbor_degree.xlsx', flag)
D = {} S = [len(S_init)] I = [len(I_init)] R = [len(R_init)] a = np.zeros(20) b = np.zeros(20) c = np.zeros(20) e = np.zeros(20) Ro = np.zeros(20) S_last_20 = np.zeros(20) I_last_20 = np.zeros(20) R_last_20 = np.zeros(20) G = graph_generate(p) count = count + 1 for t in range(len(time)): dc = nx.number_connected_components(G) if (dc > 1): print "break" break delta_I = 0 delta_R = 0 for i in range(len(G.nodes())): if (list(G.nodes())[i] in S_init): D[list(G.nodes())[i]] = 0 if (list(G.nodes())[i] in I_init): D[list(G.nodes())[i]] = 1 if (list(G.nodes())[i] in R_init): D[list(G.nodes())[i]] = 2 Suscept_count = len(S_init) Infect_count = len(I_init)
def test_number_weakly_connected_components(self): for G, C in self.gc: U = G.to_undirected() w = nx.number_weakly_connected_components(G) c = nx.number_connected_components(U) assert_equal(w, c)
def scc_nh_subgraph(m, n): return nx.number_connected_components( G.subgraph(list(set(G.neighbors(m)).union(set(G.neighbors(n))))))
def Mccabe_Complexity(graph): return graph.number_of_edges() - graph.number_of_nodes() + 2 * nx.number_connected_components(graph)
def calculate_modularity(part_G, orig_G, m): # print("m",m) E = part_G.number_of_edges() # print("E",E) nodes = part_G.nodes() s = 0 for i in nodes: element = orig_G.degree(i) s += element # print("element",element) result = E / m - (s / (2 * m))**2 # print("result",result) return result for i in range(1, 6): print(i, " connected part") M = g.number_of_edges() parts = list(nx.connected_component_subgraphs(g)) sum = 0 for item in parts: print(item.nodes()) sum += calculate_modularity(item, g, M) print("Modularity score", sum) print("edges removed:", startEdges - M) if i != 5: while (nx.number_connected_components(g) == i): b = approximate_calculate_edge_betweenness(g) maxedge = (max(b, key=b.get)) g.remove_edge(*maxedge)
def discoverAssociationLocus(self, associationPeakGraph=None, min_overlap_ratio=0.1): """ 2012.12.12 try to output the peaks that are associated with one locus. for each peak, output * result-id * phenotype id * chromosome * start * stop * start_locus * stop_locus * no_of_loci * peak_locus * peak-score 2012.11.20 2012.6.24 """ sys.stderr.write("Discovering association loci from graph of %s nodes. %s edges. %s connected components..."%\ (associationPeakGraph.number_of_nodes(), associationPeakGraph.number_of_edges(), \ nx.number_connected_components(associationPeakGraph) )) cc_graph_list = nx.connected_component_subgraphs(associationPeakGraph) counter = 0 associationLocusList = [] for cc_graph in cc_graph_list: #calculate connectivity of this component ne = cc_graph.number_of_edges() nn = cc_graph.number_of_nodes() if nn>1: connectivity = ne/float(nn*(nn-1)/2) else: connectivity = 1 start_ls = [] stop_ls = [] association_peak_ls = [] #get span of each node, then take median of all its start/stop result_id_set = set() chromosome_set = set() #should be only one chromosome phenotype_id_set = set() for n in cc_graph: nodeObject = associationPeakGraph.node[n] chromosome_set.add(nodeObject['chromosome']) span = nodeObject['span'] start_ls.append(span[0]) stop_ls.append(span[1]) association_peak_ls.extend(nodeObject['association_peak_ls']) result_id_set.add(nodeObject['result_id']) phenotype_id_set.add(nodeObject['phenotype_method_id']) if len(chromosome_set)>1: sys.stderr.write("Error: %s chromosomes (%s) in one connected component.\n"%(len(chromosome_set), repr(chromosome_set))) sys.exit(7) median_start = numpy.median(start_ls) median_stop = numpy.median(stop_ls) no_of_results = len(result_id_set) associationLocus = PassingDataList() #assign each value separately to impose the order of variables in associationLocus's internal list associationLocus.chromosome = chromosome_set.pop() associationLocus.start=median_start associationLocus.stop=median_stop associationLocus.no_of_peaks=nn associationLocus.connectivity=connectivity associationLocus.no_of_results=no_of_results associationLocus.association_peak_ls=association_peak_ls phenotype_id_ls = list(phenotype_id_set) phenotype_id_ls.sort() associationLocus.phenotype_id_ls_in_str = utils.getStrOutOfList(phenotype_id_ls) #PassingDataList is sortable via (chromosome, start, stop ...) associationLocusList.append(associationLocus) counter += 1 sys.stderr.write("%s association loci.\n"%(counter)) return associationLocusList
import networkx as nx import numpy as np import matplotlib.pyplot as plt import random G = nx.read_edgelist("ca-GrQc.txt", comments='#', delimiter='\t', nodetype=int, create_using=nx.Graph()) ############## Question 2 # Network Characteristics print 'Number of nodes:', G.number_of_nodes() print 'Number of edges:', G.number_of_edges() print 'Number of connected components:', nx.number_connected_components(G) # Connected components GCC = list(nx.connected_component_subgraphs(G))[0] # Fraction of nodes and edges in GCC print "Fraction of nodes in GCC: ", GCC.number_of_nodes() / G.number_of_nodes() print "Fraction of edges in GCC: ", GCC.number_of_edges() / G.number_of_edges() #%% ############## Question 3 # Degree degree_sequence = G.degree().values() print "Min degree ", np.min(degree_sequence) print "Max degree ", np.max(degree_sequence) print "Median degree ", np.median(degree_sequence)
def main(adminIsPoint=False): ## Define filepath path = os.path.realpath( os.path.abspath( os.path.split(inspect.getfile(inspect.currentframe()))[0])) path = os.path.split(path)[0] ## Define dash. This .xlsm includes settings for the criticality script dash = os.path.join(path, r'dashboard.xlsm') ctrl = pd.read_excel(dash, sheetname="AGGREGATE", index_col=0) ## Define operative district. Note, this parameter can be anything - it is the sub folder in input, runtime where files are drawn from district = ctrl['Weight'].loc['DISTRICT'] # ensure folders exist runtime = os.path.join(path, r'PCS\Criticality\runtime\%s\\' % district) ## Add logging logging.basicConfig(filename=os.path.join(runtime, "PCS_Criticality.log"), level=logging.INFO, format="%(asctime)s-%(levelname)s: %(message)s") logging.info("Starting Criticality Process") print "Running: Criticality Analysis on %s. Do not interrupt" % district ## Path Settings # outputs outpath = os.path.join(path, 'Outputs', '%s' % district) for d in [outpath, runtime]: if not os.path.isdir(d): os.mkdir(d) ## Input file setting # location of OD OD_IN = os.path.join(path, 'PCS\Criticality\Input', '%s' % district) # location of administrative boundaries file DATA_IN = os.path.join(path, 'PCS\Criticality\Data_Layers') inAdmin = os.path.join(DATA_IN, 'Poverty_Communes_2009.shp') # road network import. Must be a .csv including geometry information of roads. inNetworkFile = os.path.join(OD_IN, 'Network.csv') # set WGS 84 coordinate reference system crs_in = {'init': 'epsg:4326'} # ensure folders exist for d in [outpath, runtime, OD_IN]: if not os.path.isdir(d): os.mkdir(d) # error checking - Check input data existence for curFile in [dash, inNetworkFile, inAdmin, DATA_IN, OD_IN]: if not os.path.exists(curFile): logging.error("No input found: %s" % curFile) raise ValueError("No input found: %s" % curFile) # import input dataframes - road network and control dashboard inNetwork = pd.read_csv(inNetworkFile) ctrldf = pd.read_excel(dash, sheetname="CRITICALITY", index_col='COL_ID') #Inputs # setting network shapefile location network = os.path.join(runtime, 'Network.shp') ## Network Preparation # set default iri value as the mean iri of roads for which iri exists. fillvalue = inNetwork['iri_med'].mean() # fill iri value where missing inNetwork['TC_iri_med'] = inNetwork['iri_med'].fillna(fillvalue) # set cost of traversing segment according to length and IRI, per settings in the excel dashboard inNetwork['total_cost'] = inNetwork['length'] * ( ctrldf['Base_cost_km'][0] + (ctrldf['IRI_Coeff'][0] * inNetwork['TC_iri_med'])) # convert the pandas DataFrame to a GeoDataFrame ginNetwork = gpd.GeoDataFrame(inNetwork, crs=crs_in, geometry=inNetwork['Line_Geometry'].map( shapely.wkt.loads)) # set up Shapefile of road network ginNetwork.to_file(network, driver='ESRI Shapefile') logging.info("Successfully loaded data") # Generate admin boundary centroids if not adminIsPoint: prepareAdminCentroids(ginNetwork, inAdmin, crs_in, os.path.join(OD_IN, 'adm_centroids.shp')) logging.info("Created admin centroids") # define function for loading origin files into a dictionary. Paramters controlled from dashboard excel def makeOrigin(n, ctrldf): origindict = { 'name': ctrldf['OName'][n], 'file': os.path.join(path, 'PCS', 'Criticality', 'Input', district, '%s.shp' % ctrldf['OName'][n]), 'scalar_column': ctrldf['OScalar'][n] } return origindict # define function for loading destination files into a dictionary. Paramters controlled from dashboard excel def makeDestination(n, ctrldf): destdict = { 'name': ctrldf['DName'][n], 'file': os.path.join(path, 'PCS', 'Criticality', 'Input', district, '%s.shp' % ctrldf['DName'][n]), 'penalty': ctrldf['DPenalty'][n], 'importance': ctrldf['DImportance'][n], 'annual': ctrldf['DAnnual'][n], 'scalar_column': ctrldf['DScalar'][n] } return destdict # load origins and destinations into dictionary, create dictionaries of each set origin_1, origin_2, origin_3, origin_4, origin_5 = makeOrigin( 0, ctrldf), makeOrigin(1, ctrldf), makeOrigin(2, ctrldf), makeOrigin( 3, ctrldf), makeOrigin(4, ctrldf) originlist = { '%s' % ctrldf['OName'][0]: origin_1, '%s' % ctrldf['OName'][1]: origin_2, '%s' % ctrldf['OName'][2]: origin_3, '%s' % ctrldf['OName'][3]: origin_4, '%s' % ctrldf['OName'][4]: origin_5, } destination_1, destination_2, destination_3, destination_4, destination_5 = makeDestination( 0, ctrldf), makeDestination(1, ctrldf), makeDestination( 2, ctrldf), makeDestination(3, ctrldf), makeDestination(4, ctrldf) destinationlist = { '%s' % ctrldf['DName'][0]: destination_1, '%s' % ctrldf['DName'][1]: destination_2, '%s' % ctrldf['DName'][2]: destination_3, '%s' % ctrldf['DName'][3]: destination_4, '%s' % ctrldf['DName'][4]: destination_5, } logging.info("Opened origins and destinations") # Prepation of network via TU Delft code gdf_points, gdf_node_pos, gdf = net_p.prepare_centroids_network( origin_1['file'], network) gdf.to_csv( os.path.join( r'C:\Users\charl\Documents\GitHub\Criticality\PCS\Criticality\Runtime\[district_1]', 'gdf.csv')) gdf_node_pos.to_csv( os.path.join( r'C:\Users\charl\Documents\GitHub\Criticality\PCS\Criticality\Runtime\[district_1]', 'gdf_node_pos.csv')) # Create Networkx MultiGraph object from the GeoDataFrame G = net_p.gdf_to_simplified_multidigraph(gdf_node_pos, gdf, simplify=False) # Change the MultiGraph object to Graph object to reduce computation cost G_tograph = net_p.multigraph_to_graph(G) logging.info( 'Loaded road network: number of disconnected components is: %d' % nx.number_connected_components(G_tograph)) # Observe the properties of the Graph object nx.info(G_tograph) # Take only the largest subgraph with all connected links len_old = 0 for g in nx.connected_component_subgraphs(G_tograph): if len(list(g.edges())) > len_old: G1 = g len_old = len(list(g.edges())) G_sub = G1.copy() nx.info(G_sub) # Save the simplified transport network into a GeoDataFrame gdf_sub = net_p.graph_to_df(G_sub) blank, gdf_node_pos2, gdf_new = net_p.prepare_newOD( origin_1['file'], gdf_sub) #Road Network Graph prep G2_multi = net_p.gdf_to_simplified_multidigraph(gdf_node_pos2, gdf_new, simplify=False) # Dump files to runtime if dump = 1 Filedump(gdf_new, 'Road_Lines', runtime) Filedump(gdf_node_pos2, 'Road_Nodes', runtime) G2 = net_p.multigraph_to_graph(G2_multi) gdf2 = net_p.graph_to_df(G2) nLink = len(G2.edges()) # open empty lists Outputs, cost_list, iso_list = [], [], [] ## Run the calculateOD function for each combination of origins and destinations specified in the control excel # append all outputs to the Outputs, cost_list and iso_list objects just created for z in ctrldf.index: if (((ctrldf['ComboO'][z]) != 0) & ((ctrldf['ComboD'][z]) != 0) & (pd.notnull(ctrldf['ComboO'][z])) & (pd.notnull(ctrldf['ComboO'][z]))): Q = int(ctrldf['ComboNumber'][z]) logging.info( 'Computing | combination %s as origin and %s as destination ' % (ctrldf['ComboO'][z], ctrldf['ComboD'][z])) xx = calculateOD(originlist['%s' % ctrldf['ComboO'][z]], destinationlist['%s' % ctrldf['ComboD'][z]], Q, gdf_sub, G2, nLink, gdf2, runtime, ctrldf) Outputs.append(xx) cost_list.append("Social_Cost_%s" % Q) iso_list.append("Isolated_Trips_%s" % Q) # drop unneccessary columns Output = inNetwork.drop(["geometry", 'TC_iri_med', 'total_cost'], axis=1) # for each object in the Outputs list: for o_d_calc in range(0, len(Outputs)): # Merge the objects together. This creates multiple columns showing each scenario Output = Output.merge(Outputs[o_d_calc]['summary'], how='left', on='ID') # sum across the relevant columns - the 'Social_Cost' columns generated above in calculateOD for each O-D file combo Output['Cost_total'] = Output[cost_list].sum(axis=1) # sum across the relevant columns - the 'Isolated_Trips' columns generated above in calculateOD for each O-D file combo Output['Iso_total'] = Output[iso_list].sum(axis=1) # Generate an overall criticality score for each road based on user input weights between isolated trips and disrupted trips Output['CRIT_SCORE'] = ( ctrldf['Disrupt_Weight'][0] * Output['Cost_total'] + ctrldf['Isolate_Weight'][0] * Output['Iso_total']) # normalize for each road Output['CRIT_SCORE'] = ( (Output['CRIT_SCORE'] - Output['CRIT_SCORE'].min()) / (Output['CRIT_SCORE'].max() - Output['CRIT_SCORE'].min())) logging.info("Calculated PCS Criticality") FileOut(Output, 'criticality_output', outpath)
def components(bot, update): print('received components') global global_graph bot.send_message(chat_id=update.message.chat_id, text=str(nx.number_connected_components(global_graph)))
def get_stats(G, output_path=None, all_stats=False): """ Prints or stores some basic statistics about the graph. If an output path is provided the results are written in said file. Parameters ---------- G : graph A NetworkX graph or digraph. output_path : file or string, optional File or filename to write. Default is None all_stats : bool, optional Sets if all stats or a small subset of them should be shown. Computing all stats can be very slow. Default is False. """ # Compute the number of nodes and edges of the graph N = len(G.nodes) M = len(G.edges) # Compute average degree and deg1 and deg2 num nodes degs = np.array(G.degree)[:, 1] avgdeg = sum(degs) / N counts = collections.Counter(degs) degdict = collections.OrderedDict(sorted(counts.items())) deg1 = degdict.get(1, 0) deg2 = degdict.get(2, 0) if all_stats: x = np.log(np.array(degdict.keys())) # degrees y = np.log(np.array(degdict.values())) # frequencies # the power-law coef. is the slope of a linear moder fitted to the loglog data which has closed-form solution plawcoef = np.abs(np.cov(x, y) / np.var(x))[0, 1] cc = nx.average_clustering(G) dens = nx.density(G) if G.is_directed(): diam = nx.diameter(G) if nx.is_strongly_connected(G) else float( 'inf') else: diam = nx.diameter(G) # Print or write to file the graph info if output_path is None: # Print some basic info about the graph if G.is_directed(): num_ccs = nx.number_weakly_connected_components(G) Gcc = max(nx.weakly_connected_component_subgraphs(G), key=len) Ncc = len(Gcc.nodes) Mcc = len(Gcc.edges) print("Directed graph") print("Num. nodes: {}".format(N)) print("Num. edges: {}".format(M)) print("Num. weakly connected components: {}".format(num_ccs)) print("Num. nodes in largest weakly CC: {} ({} % of total)".format( Ncc, Ncc * 100.0 / N)) print("Num. edges in largest weakly CC: {} ({} % of total)".format( Mcc, Mcc * 100.0 / M)) else: num_ccs = nx.number_connected_components(G) Gcc = max(nx.connected_component_subgraphs(G), key=len) Ncc = len(Gcc.nodes) Mcc = len(Gcc.edges) print("Undirected graph") print("Num. nodes: {}".format(N)) print("Num. edges: {}".format(M)) print("Num. connected components: {}".format(num_ccs)) print("Num. nodes in largest weakly CC: {} ({} % of total)".format( Ncc, Ncc * 100.0 / N)) print("Num. edges in largest weakly CC: {} ({} % of total)".format( Mcc, Mcc * 100.0 / M)) if all_stats: print("Clustering coefficient: {}".format(cc)) print("Diameter: {}".format(diam)) print("Density: {}".format(dens)) print("Power-law coefficient: {}".format(plawcoef)) print("Avg. node degree: {}".format(avgdeg)) print("Num. degree 1 nodes: {}".format(deg1)) print("Num. degree 2 nodes: {}".format(deg2)) print("Num. self loops: {}".format(G.number_of_selfloops())) print("") else: # Write the info to the provided file f = open(output_path, 'w+b') if G.is_directed(): num_ccs = nx.number_weakly_connected_components(G) Gcc = max(nx.weakly_connected_component_subgraphs(G), key=len) Ncc = len(Gcc.nodes) Mcc = len(Gcc.edges) f.write("# Directed graph".encode()) f.write("\n# Num. nodes: {}".format(N).encode()) f.write("\n# Num. edges: {}".format(M).encode()) f.write("\n# Num. weakly connected components: {}".format( num_ccs).encode()) f.write("\n# Num. nodes in largest weakly CC: {} ({} % of total)". format(Ncc, Ncc * 100.0 / N).encode()) f.write("\n# Num. edges in largest weakly CC: {} ({} % of total)". format(Mcc, Mcc * 100.0 / M).encode()) else: num_ccs = nx.number_connected_components(G) Gcc = max(nx.connected_component_subgraphs(G), key=len) Ncc = len(Gcc.nodes) Mcc = len(Gcc.edges) f.write("# Undirected graph".encode()) f.write("\n# Num. nodes: {}".format(N).encode()) f.write("\n# Num. edges: {}".format(M).encode()) f.write( "\n# Num. connected components: {}".format(num_ccs).encode()) f.write("\n# Num. nodes in largest CC: {} ({} % of total)".format( Ncc, Ncc * 100.0 / N).encode()) f.write("\n# Num. edges in largest CC: {} ({} % of total)".format( Mcc, Mcc * 100.0 / M).encode()) if all_stats: f.write("\n# Clustering coefficient: {}".format(cc).encode()) f.write("\n# Diameter: {}".format(diam).encode()) f.write("\n# Density: {}".format(dens).encode()) f.write("\n# Power-law coefficient: {}".format(plawcoef).encode()) f.write("\n# Avg. node degree: {}".format(avgdeg).encode()) f.write("\n# Num. degree 1 nodes: {}".format(deg1).encode()) f.write("\n# Num. degree 2 nodes: {}".format(deg2).encode()) f.write("\n# Num. self loops: {}".format( G.number_of_selfloops()).encode()) f.write("\n".encode()) f.close()
def double_GLF_heuristic(radii, simplex_measure, tri, d=2, a_mid=.5, heuristic='minima', opt_method='ampgo', eval_parameters=['geom', 30000], debug=False, opt_kws=dict()): """ Parameters ---------- radii : array of shape [n_centers, n_features] simplex_measure : array of shape [n_gaussian_samples, n_features] Generated by make_multivariate_gaussians opt_method : string, compatible with lmfit Determines optimization routine for regression, recommend global optimization such as ampgo or basinhopping heuristic : string eval_parameters : list of length 2 Returns ------- """ orig_x = np.array(radii[np.argsort(radii)]) fit_x = orig_x n_inflection = 0 fit_y = np.cumsum( simplex_measure[np.argsort(radii)]) / np.sum(simplex_measure) with warnings.catch_warnings(): warnings.simplefilter('ignore') d_glf_model = Model(double_glf) d_glf_model.set_param_hint('a1', value=0, min=0, max=1, vary=False) d_glf_model.set_param_hint('adiff1', value=a_mid, min=0.0005, max=1) d_glf_model.set_param_hint('a2', value=.5, min=0, expr='adiff1 + a1') d_glf_model.set_param_hint('a3', value=1, min=0, vary=False) d_glf_model.set_param_hint('b1', value=1, min=0, max=1000) d_glf_model.set_param_hint('b2', value=1, min=0, max=1000) d_glf_model.set_param_hint('c1', value=1, min=0.00001, vary=False) d_glf_model.set_param_hint('c2', value=1, min=0.00001, vary=False) d_glf_model.set_param_hint('q1', value=1, min=0.00001, max=1) d_glf_model.set_param_hint('q2', value=1, min=0.00001, max=1) d_glf_model.set_param_hint('v1', value=1, min=0.00001, max=100) d_glf_model.set_param_hint('v2', value=1, min=0.00001, max=100) d_glf_result = d_glf_model.fit(fit_y, x=fit_x, method=opt_method, nan_policy='propagate', fit_kws=opt_kws) # Use yeval to calculate r2 y_pred = double_glf(fit_x, d_glf_result.params['a1'].value, d_glf_result.params['a2'].value, \ d_glf_result.params['a3'].value, d_glf_result.params['c1'].value, \ d_glf_result.params['c2'].value, d_glf_result.params['b1'].value, \ d_glf_result.params['b2'].value, d_glf_result.params['q1'].value, \ d_glf_result.params['q2'].value, d_glf_result.params['v1'].value, \ d_glf_result.params['v2'].value) R2 = 1 - np.sum((fit_y - y_pred)**2) / np.sum((fit_y - np.mean(fit_y))**2) if eval_parameters[0] == 'geom': x_eval = np.geomspace(np.min(radii), np.max(radii), num=eval_parameters[1]) elif eval_parameters[0] == 'linear': x_eval = np.linspace(np.min(radii), np.max(radii), num=eval_parameters[1]) # Determine slope and concavity for use in determining the optimal alpha value y_eval = double_glf(x_eval, d_glf_result.params['a1'].value, d_glf_result.params['a2'].value, \ d_glf_result.params['a3'].value, d_glf_result.params['c1'].value, \ d_glf_result.params['c2'].value, d_glf_result.params['b1'].value, \ d_glf_result.params['b2'].value, d_glf_result.params['q1'].value, \ d_glf_result.params['q2'].value, d_glf_result.params['v1'].value, \ d_glf_result.params['v2'].value) y_slope = double_glf_dx(x_eval, d_glf_result.params['a1'].value, d_glf_result.params['a2'].value, \ d_glf_result.params['a3'].value, d_glf_result.params['c1'].value, \ d_glf_result.params['c2'].value, d_glf_result.params['b1'].value, \ d_glf_result.params['b2'].value, d_glf_result.params['q1'].value, \ d_glf_result.params['q2'].value, d_glf_result.params['v1'].value, \ d_glf_result.params['v2'].value) y_con = double_glf_dx2(x_eval, d_glf_result.params['a1'].value, d_glf_result.params['a2'].value, \ d_glf_result.params['a3'].value, d_glf_result.params['c1'].value, \ d_glf_result.params['c2'].value, d_glf_result.params['b1'].value, \ d_glf_result.params['b2'].value, d_glf_result.params['q1'].value, \ d_glf_result.params['q2'].value, d_glf_result.params['v1'].value, \ d_glf_result.params['v2'].value) peaks, __ = find_peaks(y_slope, width=2, height=.005) maxima, minima = find_extrema(y_slope, threshold=0) inflection, __ = find_peaks(np.abs(np.gradient(np.sign(y_con)))) single_1 = glf(x_eval, d_glf_result.params['a1'].value, d_glf_result.params['a2'].value, \ d_glf_result.params['c1'].value, d_glf_result.params['b1'].value, \ d_glf_result.params['q1'].value, d_glf_result.params['v1'].value) single_2 = glf(x_eval, 0, d_glf_result.params['a3'].value - d_glf_result.params['a2'].value, \ d_glf_result.params['c2'].value, d_glf_result.params['b2'].value, \ d_glf_result.params['q2'].value, d_glf_result.params['v2'].value) plt.plot(x_eval, single_1, linestyle='--') plt.plot(x_eval, single_2, linestyle='-.') plt.ylabel('% Convex Volume') plt.xlabel('Alpha') plt.xscale('log') plt.legend(['GLF 1', 'GLF 2']) if debug == True: plt.show() else: plt.close() plt.figure() plt.plot(fit_x, fit_y, linestyle='none', marker='.') plt.plot(x_eval, y_eval, linestyle='-.') plt.ylabel('% Convex Volume') plt.xlabel('Alpha') plt.xscale('log') #d_glf_result.plot() plt.title(opt_method) plt.axhline(d_glf_result.params['a1'].value) plt.axhline(d_glf_result.params['a3'].value - d_glf_result.params['a2'].value) plt.axhline(d_glf_result.params['a3'].value) plt.xscale('log') for i in peaks: plt.axvline(x_eval[i], color='r', linestyle='--') for i in minima: for k in range(len(peaks) - 1): if i > peaks[k] and i < peaks[k + 1]: plt.axvline(x_eval[i], color='r', linestyle='--') for i in inflection: plt.axvline(x_eval[i], color='k', linestyle='--') if debug == True: plt.show() else: plt.close() fig, axs = plt.subplots(3, 1, sharex=True) axs[0].plot(fit_x, fit_y) axs[0].plot(x_eval, y_eval) axs[1].plot(x_eval, y_slope) axs[2].plot(x_eval, y_con) axs[0].set_xlabel('Alpha') axs[1].set_ylabel('Volume') axs[1].set_ylabel('First Derivative') axs[1].set_ylim(0, 1.5 * np.min(y_slope[peaks])) axs[2].set_ylabel('Second Derivative') axs[0].set_xscale('log') axs[1].set_xscale('log') axs[2].set_xscale('log') for i in peaks: axs[0].axvline(x_eval[i], color='r', linestyle='--') axs[1].axvline(x_eval[i], color='r', linestyle='--') for i in minima: for k in range(len(peaks) - 1): if i > peaks[k] and i < peaks[k + 1]: axs[0].axvline(x_eval[i], color='r', linestyle='--') axs[1].axvline(x_eval[i], color='r', linestyle='--') for i in inflection: axs[1].axvline(x_eval[i], color='k', linestyle='--') axs[0].axvline(x_eval[i], color='k', linestyle='--') if debug == True: plt.show() else: plt.close() optimal_alpha = x_eval[-1] + 1 if len(peaks) == 1: message = 'Convex: Only one region of maximal slope' if debug: print(message) flag = False optimal_alpha = x_eval[-1] + 1 else: if heuristic == 'minima': n_inflection = 2 for i in minima: for k in range(len(peaks) - 1): if i > peaks[k] and i < peaks[k + 1]: #print(x_eval[i]) alpha_boolean = alpha_hull.alpha_shape( tri, radii, x_eval[i]) edges, vertices = alpha_hull.collect_alpha_edges( tri, alpha_boolean, d=d) G_alpha = network_from_collection(edges, vertices) n_components = nx.number_connected_components(G_alpha) if n_components > 1: message = 'Convex: Slope minima is disjoint' if debug: print(message) flag = False optimal_alpha = x_eval[-1] + 1 elif len(vertices) != len(tri.points): message = 'Convex: Slope minima only contains ' + str( 100 * len(vertices) / len(tri.points)) + ' percent of samples' if debug: print(message) flag = False optimal_alpha = x_eval[-1] + 1 else: message = 'Concave: Slope minima contains all samples' if debug: print(message) flag = True optimal_alpha = x_eval[i] elif heuristic == 'inflection': counter = 1 for i in inflection[1::]: #print(x_eval[i]) counter += 1 alpha_boolean = alpha_hull.alpha_shape(tri, radii, x_eval[i]) edges, vertices = alpha_hull.collect_alpha_edges(tri, alpha_boolean, d=d) G_alpha = network_from_collection(edges, vertices) n_components = nx.number_connected_components(G_alpha) if counter >= 4: message = 'Convex: An inflection point exceeding the third contains all the points and is most likely noise.' if debug: print(message) flag = False optimal_alpha = x_eval[-1] + 1 break elif n_components > 1: message = 'Convex: The inflection point, (' + str( counter) + '), at ' + str(i) + ' is disjoint' if debug: print(message) flag = False optimal_alpha = x_eval[-1] + 1 elif len(vertices) == len(tri.points): message = 'Concave: The inflection point, (' + str( counter) + '), at ' + str(i) + ' contains ' + str( 100 * len(vertices) / len(tri.points)) + ' percent of samples' flag = True if debug: print(message) optimal_alpha = x_eval[i] n_inflection = counter break elif counter == len(inflection): message = 'Convex: None of the inflection points contained all samples' flag = False if debug: print(message) optimal_alpha = x_eval[-1] + 1 return optimal_alpha, d_glf_result, R2, n_inflection, flag, message
for j in range(len(list(i)) - 1): if u == i[j] and v == i[j + 1]: count += 1 for i in G.edges(): if (u == i[0] and v == i[1]) or (u == i[1] and v == i[0]): Edges[i] += count return len(paths) #Main function for i in G.edges(): count = 0 for u in G.nodes(): for v in G.nodes(): if u != v: P = nx.all_shortest_paths(G, u, v) count += betweeness(list(P), i[0], i[1]) Edges[i] = float(Edges[i]) / count plt.figure("Communities") print("Betweeness Removed Edges") for i in reversed(sorted((v, k) for (k, v) in Edges.items())): print(str(i[0]) + " " + str(i[1])) G.remove_edge(i[1][0], i[1][1]) if nx.number_connected_components(G) == N: break nx.draw(G) plt.show()
def gnewman(club,splitTo = 2): itteration = 0 # ok so why do I check the number of connected components # for an undirected graph it is know that a connected component of an # an undirected graph is a subgraph in which any two vertices are connected to each other by paths # this is useful for this application since we are splitting a graph into two subgraphs # ie to mathematically represent the splitting of the club while nx.number_connected_components(club) < splitTo: # returns to us edges with the weights between = nx.edge_betweenness_centrality(club,normalized=False) # we want the edges with the highest edge betweenness centrality # there might be ties so just get the max betweenness m = max(between.values()) # unpack the tuple returned to us by between.items ((u,v), maxBetweenScore) for (hU,hV),val in between.items(): # check to see if m(max betweenness score) is equal to val # removes ties along the way if val == m: club.remove_edge(hU,hV) print("removed edge %s--%s with betweenness score of %f"%(hU,hV,m)) itteration += 1 print("-------------------------") # this print out can be uncommented it simply shows the same metric as described two different ways # print(nx.number_connected_components(club),len(list(nx.connected_component_subgraphs(club)))) print("total iterations %d for splitting into %d"%(itteration,splitTo))