def adjlist2gexf(fAdjlist, bIntNode=1): ''' Converts a graph in the adjacency list format to the GEXF format. input parameters: fAdjlist: The file name of the adjacency list bIntNode: Indicates if the node type is integer. The default is 1 (i.e., nodes are interger type). returns: None output: This function generates an GEXF format file with the same name the input file, with .gexf extension. ''' # first, loading the graph if bIntNode==1: G = nx.read_adjlist(fAdjlist, nodetype=int) else: G = nx.read_adjlist(fAdjlist) # the output file name (fOutRoot,tmpExt) = os.path.splitext(fAdjlist) fOut = fOutRoot + '.gexf' # writing out nx.write_gexf(G, fOut)
def test_adjlist_integers(self): (fd, fname) = tempfile.mkstemp() G = nx.convert_node_labels_to_integers(self.G) nx.write_adjlist(G, fname) H = nx.read_adjlist(fname, nodetype=int) H2 = nx.read_adjlist(fname, nodetype=int) assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges())) os.close(fd) os.unlink(fname)
def test_adjlist_graph(self): G=self.G (fd,fname)=tempfile.mkstemp() nx.write_adjlist(G,fname) H=nx.read_adjlist(fname) H2=nx.read_adjlist(fname) assert_not_equal(H,H2) # they should be different graphs assert_equal(sorted(H.nodes()),sorted(G.nodes())) assert_equal(sorted(H.edges()),sorted(G.edges())) os.close(fd) os.unlink(fname)
def test_adjlist_digraph(self): G = self.DG (fd, fname) = tempfile.mkstemp() nx.write_adjlist(G, fname) H = nx.read_adjlist(fname, create_using=nx.DiGraph()) H2 = nx.read_adjlist(fname, create_using=nx.DiGraph()) assert_not_equal(H, H2) # they should be different graphs assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges())) os.close(fd) os.unlink(fname)
def test_adjlist_multidigraph(self): G=self.XDG (fd,fname)=tempfile.mkstemp() nx.write_adjlist(G,fname) H=nx.read_adjlist(fname,nodetype=int, create_using=nx.MultiDiGraph()) H2=nx.read_adjlist(fname,nodetype=int, create_using=nx.MultiDiGraph()) assert_not_equal(H,H2) # they should be different graphs assert_equal(sorted(H.nodes()),sorted(G.nodes())) assert_equal(sorted(H.edges()),sorted(G.edges())) os.close(fd) os.unlink(fname)
def construct_HardThE(fCorr, ffMRI): # # a function to generate hard thresholding networks with the same number # of edges as rank-thresholded networks. # # # some parameters Target_d = [3, 4, 5, 6, 8, 10, 15, 20, 30] # Output directory is relative to fCorr directory CorrDir, fCorrMat = os.path.split(fCorr) BaseDir, CorrDirName = os.path.split(CorrDir) OutBase = os.path.join(BaseDir, 'Adjlist') if not os.path.exists(OutBase): os.makedirs(OutBase) OutDir = os.path.join(OutBase, 'Network_HardThE') if not os.path.exists(OutDir): os.makedirs(OutDir) # directory where rank-th networks are RankDir = os.path.join(OutBase, 'Network_RankTh') # loading the correlation matrix R, NodeInd = NetUtil.load_corrmat_sparse(fCorr, ffMRI) # loop for generating hard-th networks for d in Target_d: print "Generating an equivalent hard thresholded network with d=" + str(d) # loading the rank thresholded network to determine the number of edges fdNetFile = "Network_d" + str(d) + ".adjlist" fdNet = os.path.join(RankDir,fdNetFile) tmpG = nx.read_adjlist(fdNet) E = len(tmpG.edges()) # generating the network G, RTh = NetUtil.net_builder_HardThE(R, NodeInd, E) # saving the network fNetFile = "Network_EQd" + str(d) + ".adjlist" fNet = os.path.join(OutDir,fNetFile) nx.write_adjlist(G, fNet)
def netinfo(request): """Take uploaded network, find its values, output them""" # cleans out images, so that only the most recent upload displays: to be replaced with session handling format = ['png', 'svg'] for f in format: if os.path.isfile(MEDIA_ROOT + '/nets/H.' + f): os.remove(MEDIA_ROOT + '/nets/H.' + f) if os.path.isfile(MEDIA_ROOT+"/nets/degree_histogram.png"): os.remove(os.path.join(MEDIA_ROOT+"/nets/degree_histogram.png")) #Generate graph # G = nx.petersen_graph() # G=nx.path_graph(12) # G=nx.random_geometric_graph(50,0.125) # Store the generated graph. # path = os.path.join(MEDIA_ROOT, 'nets/test.adjlist') #settings.GRAPH_DIR, 'graph.gml.bz2') # nx.write_gml(G, path) G=nx.read_adjlist(MEDIA_ROOT+"/nets/test.adjlist") # nx.write_adjlist(G, path) nssresult = netstats_simple(G) # raise fromInfo # try: # true # except fromInfo: return render_to_response('netinfo.html', nssresult)
def network_analysis(gene_list,network_file,outdir): outfn = "%s/output" % outdir f = open(outfn,'w') f.write("gene\tdegrees\tbtw_centrality\n") network = networkx.read_adjlist(network_file) print "Number of edges in input graph: %s" % network.number_of_edges() print "Number of nodes in input graph: %s" % network.number_of_nodes() subnetwork = network.subgraph(gene_list) print "Number of edges in subgraph: %s" % subnetwork.number_of_edges() print "Number of nodes in subgraph: %s" % subnetwork.number_of_nodes() bwt_central = networkx.betweenness_centrality(subnetwork) degrees = subnetwork.degree(gene_list) for gene in gene_list: # Number of degrees if gene in degrees: num_degrees = degrees[gene] else: num_degress = "NA" # Betweenness centrality if gene in bwt_central: btw_gene = bwt_central[gene] else: btw_gene = "NA" # File with neighbor nodes if subnetwork.has_node(gene): neighbors = list(networkx.all_neighbors(subnetwork,gene)) edges = [(unicode(gene),neighbor) for neighbor in neighbors] neighbor_networks = networkx.from_edgelist(edges) write_networks(neighbor_networks,gene,outdir) f.write("%s\t%s\t%s\n" % (gene,num_degrees,btw_gene)) f.close()
def main(): gname = sys.argv[1] species = sys.argv[2] ofname = sys.argv[3] ofile = open(ofname,'wb') G = nx.read_adjlist(gname) notFound = [] fs = 'http://www.uniprot.org/uniprot/?query={0}+AND+organism%3A{1}&sort=score&format=fasta&limit=3' for i,n in enumerate(G.nodes()): try: on = float(n) on = "ORF"+n except Exception as e: on = n url = fs.format(on, species) print("fetching {0} using {1}".format(on,url)) req = urllib2.urlopen( url ) e = firstEnt( req.read() ) if e == "": notFound.append(on) #raise NameError("Could not find {0} @ {1}".format(n, url)) else: ofile.write( ">{0}\n".format(n)+"\n".join(e.split("\n")[1:]) ) ofile.close() print("couldn't find {0}".format(notFound))
def main(): crawl_data_dir = ( "/media/rna/yahoo_crawl_data/Yahoo-20190406T235503Z-001/Yahoo/yahoo/" ) csv_file = "/media/rna/yahoo_crawl_data/Yahoo-20190406T235503Z-001/Yahoo/URLtoHTML_yahoo_news.csv" mapping_file_df = ( pd.read_csv(csv_file).sort_values(by=["filename", "URL"]).reset_index(drop=True) ) list_of_html_files = glob.glob("{}/*.html".format(crawl_data_dir)) with open("edgeList.txt", "w") as fh: for filepath in list_of_html_files: filename = path_leaf(filepath) links = get_outgoing_links(filepath) filenames_for_url = get_filenames_for_URLs(mapping_file_df, links) # connection_matrix.loc[filename, filenames_for_url]+=1 # connection_matrix.loc[filename, filenames_for_url] =1 # with open() fh.write("{} {}\n".format(filename, " ".join(filenames_for_url))) G = nx.read_adjlist("edgeList.txt", create_using=nx.DiGraph()) pagerank = nx.pagerank( G, alpha=0.85, personalization=None, max_iter=100, tol=1e-06, nstart=None, weight="weight", dangling=None, ) with open("external_PageRankFile.txt", "w") as fh: for key, value in pagerank.items(): fh.write("{}/{}={}\n".format(crawl_data_dir, key, value))
def llegir_graf(): # O(V+E) #nom = input("Dona'm un nom pel graf: ") # O(1) #nom = "ex1_biconnexe.dat" nom = "ex2_no_biconnexe.dat" P = nx.read_adjlist(nom,nodetype = int) # O(V+E) return P # O(1)
def repo_property(repo_file_names, in_pattern): """Calculates network property of repos. param ---- repo_file_names: List of file names of repo (/ replaced to _). in_pattern: Location of adjacency list formatted graph. return ---- List of tuples (richness, triangles, transitivity). Example of in_pattern: graph_dir = "../data/network/issues/python/{0}.txt" """ property_list = [] for repo in repo_file_names: print repo graph = nx.read_adjlist(in_pattern.format(repo)) p = networkutil.get_network_property(graph) property_list.append(p) return property_list
def getStats(filepath): print("compiling stats for " + filepath) Graph = nx.read_adjlist(filepath) DegreesList = [] Degrees = Graph.degree() for Degree in Degrees: DegreesList.append(Degrees[Degree]) GraphSize = len(DegreesList) DegreesList.sort() def richClubCoefficientsFunction(): return nx.rich_club_coefficient(Graph, normalized=True) def richClubCoefficientsNoNormalisationFunction(): return nx.rich_club_coefficient(Graph, normalized=False) def averageShortestPathsFunction(): return nx.average_shortest_path_length(Graph) def average_clusteringFunction(): return nx.average_clustering(Graph) def degree_assortativity_coefficientFunction(): return nx.degree_assortativity_coefficient(Graph) Stats = {} def timeStats(label, function): Stats[label] = {} Stats[label]["times"] = [] try: for iteration in numberGenerator(10): print("calculating " + label + " for " + filepath + ", iteration: " + str(iteration)) StartTime = time.clock() Result = function() EndTime = time.clock() Stats[label]["Result"] = Result Stats[label]["times"].append(EndTime - StartTime) Stats[label]["averageTime"] = mean(Stats[label]["times"]) except: Stats[label]["Result"] = "uncomputable" Stats[label]["averageTime"] = "uncomputable" timeStats("RichClubCoefficients", richClubCoefficientsFunction) timeStats("RichClubCoefficientsNoNormalisation", richClubCoefficientsNoNormalisationFunction) timeStats("Assortativity", degree_assortativity_coefficientFunction) timeStats("AverageShortestPath", averageShortestPathsFunction) timeStats("AverageClustering", average_clusteringFunction) RichClubCoefficients = Stats["RichClubCoefficients"]["Result"] EightyFirstPercentileDegree = getPercentile(81, DegreesList) EightyFirstPercentileDegreeRichClubCoefficient = RichClubCoefficients[EightyFirstPercentileDegree] Stats["EightyFirstPercentileDegree"] = EightyFirstPercentileDegree Stats["EightyFirstPercentileDegreeRichClubCoefficient"] = EightyFirstPercentileDegreeRichClubCoefficient Stats["EightyFirstPercentileDegree"] = EightyFirstPercentileDegree Stats["CoefficientsByPercentile"] = getCoefficientsByPercentile(DegreesList, RichClubCoefficients) Stats["CoefficientsByPercentileNoNormalisation"] = getCoefficientsByPercentile( DegreesList, Stats["RichClubCoefficientsNoNormalisation"]["Result"] ) Stats["GraphSize"] = GraphSize return Stats
def main(args): directed = False #(sys.argv[2].upper() == 'DIRECTED') isMetis = False if "adjlist" in sys.argv[1].split("."): isMetis = True create_using = nx.DiGraph() if directed else nx.Graph() G = None if isMetis: G=nx.read_adjlist(sys.argv[1], nodetype=int) else: G = nx.read_edgelist(sys.argv[1], create_using=create_using, nodetype=int) #print G.n #calculate_centrality_measures(G, create_using, directed) #isMetis = False comm_n_dict = create_comm_node_mapping(G, sys.argv[3], isMetis) n_comm_map = create_node_comm_mapping(comm_n_dict) #print comm_n_dict #print n_comm_map calculate_community_measures(G, comm_n_dict, n_comm_map) if(len(sys.argv) == 5): calculate_entropy_of_youtube_communities(sys.argv[4], comm_n_dict)
def llegir_graf(): '''lee el grafo de un fichero de texto''' global Grafo Grafo = nx.Graph() nom = raw_input("Doneu el nom del graf: ") Grafo = nx.read_adjlist(nom,create_using=nx.DiGraph(),nodetype = int) return Grafo
def createMergedGraph(groupSampleDict, processedDataDir, rawModelDir): print 'Merging genomes from specified taxonomic group' # Loop over the keys of the dictionary, one for each group for group in groupSampleDict: # Create an empty graph object mergedGraph = nx.DiGraph() # Read in the graph of the group and merge with the graph from the previous # iteration for sample in groupSampleDict[group]: # Read in adjacency list and convert to digraph object myDiGraph = nx.read_adjlist(rawModelDir+'/'+sample+'/'+sample+'AdjList.txt', create_using=nx.DiGraph()) # Append to the previous graph mergedGraph = nx.compose(mergedGraph, myDiGraph) # Check that the proper output directory exists. It not, create it. if not os.path.exists(processedDataDir+'/'+group): os.makedirs(processedDataDir+'/'+group) nx.write_adjlist(mergedGraph, processedDataDir+'/'+group+'/'+group+'AdjList.txt') nx.write_graphml(mergedGraph, processedDataDir+'/'+group+'/'+group+'Graph.xml') return
def test1(): f = open('Results/relation_top5.csv', 'rb') G = nx.read_adjlist(f, delimiter = ',') x = nx.pagerank(G, alpha = 0.9) sort_x = sorted(x.items(), key=lambda item: item[1], reverse=True) for a1, a2 in sort_x: print(str(a1) + ' : ' + str(a2))
def main(name, divide): ''' old_g = pickle.load(open("/net/data/facebook/facebook-ucsb/Facebook_2008/"+name +"/original_pickles/"+name +".pickle", 'r')) new_g = networkx.Graph() for node, friends in old_g.adj.iteritems(): if node not in new_g.nodes(): new_g.add_node(node) for friend in friends.iterkeys(): new_g.add_node(friend) new_g.add_edge(node, friend) ''' #serialize the networkx graph as text files of edgelist #into a text file for workers to read # networkx.write_edgelist(new_g, "edgelist/"+name, data=False) # subprocess.check_call("hdfs dfs -put edgelist/"+name+ " edgelist/", shell=True) new_g = networkx.read_adjlist(name +"_list.txt") #Egypt_list is an edge list sc = SparkContext(appName="Sorted_removal") dataG = json_graph.node_link_data(new_g) stringG = json.dumps(dataG) originalG = sc.broadcast(stringG) edges = sc.textFile("hdfs://scrapper/user/xiaofeng/edgelist/"+name, 192*4*int(divide)) costs = edges.map(lambda line: line.split(' ')).map(lambda edge: edge_to_cost(edge, originalG.value)) costs.saveAsTextFile("hdfs://scrapper/user/xiaofeng/costs_"+name) sc.stop() subprocess.check_call("hdfs dfs -get costs_" + name + " /home/xiaofeng/facebook/FacebookProject/costs/", shell=True) Reformat("/home/xiaofeng/facebook/FacebookProject/costs/costs_" + name +"/", name)
def timeflow(opts, argv): """ Read cluster tracking results and aggregate into a single file """ g = nx.read_adjlist(argv[0]) f = sorted(glob.glob(opts.aabbIn)) N = map(lambda x: map(int,x.split(".")), nx.nodes(g)) C = dict((t,set()) for t in map(lambda x:x[0], N)) for (t,l) in N: C[t].add(l) newMesh = vtk.vtkPolyData() newLines = vtk.vtkCellArray() newPoints = vtk.vtkPoints() newTimeData = vtkIntArray() newTimeData.SetName("TimeStep") for t in C: p = readVTP(f[t]) "Filter cluster labels" a = p.GetCellData().GetArray("VortexCluster", vtk.mutable(0)) for (i,l) in enumerate(lineGenerator(p.GetLines())): c = a.GetValue(i) if c in C[t]: newLine = vtkIdList() for j in range(0, l.GetNumberOfIds()): newLine.InsertNextId(newPoints.GetNumberOfPoints()) newPoints.InsertNextPoint(p.GetPoint(l.GetId(j))) newLines.InsertNextCell(newLine) newTimeData.InsertNextValue(t) newMesh.GetCellData().SetScalars(newTimeData) newMesh.SetPoints(newPoints) newMesh.SetLines(newLines) writeVTK(opts.output, newMesh)
def ex1(): G= nx.Graph(); G=nx.read_adjlist("gr.txt", nodetype=int) nodos=[] for nodo in G.nodes(): nodos.append((len(G.neighbors(nodo)),nodo,G.neighbors(nodo))) #nodos.sort() wifi=[] re=[] while(len(wifi)!=len(G.nodes())): n=max(nodos) print "while", len(wifi),"!=",len(G.nodes()) print "nodo max: ", n[1] m=len(wifi) print "len(wifi)-->m:", m wifi.extend(n[2]) print "wifi1: ", wifi wifi = list(set(wifi)) #quita duplicados print "wifi2: ", wifi nodos.remove(n) print "nodos: ", nodos print "if(", m,"<",len(wifi),")" if(m<len(wifi)): #Si hemos anyadido algun nodo recubierto nuevo re.append(n[1]) print "RESULTADO: ", re print " " print re
def plotdegreedistribution(): fh=open("../data/adjlistfile_till_year_"+str(year), 'rb') G = nx.read_adjlist(fh, create_using=nx.DiGraph()) indegrees = G.in_degree() # dictionary node:degree invalues = sorted(set(indegrees.values())) inhist = [indegrees.values().count(x) for x in invalues] nodes = G.number_of_nodes() innewhist = [float(x)/nodes for x in inhist] outdegrees = G.out_degree() # dictionary node:degree outvalues = sorted(set(outdegrees.values())) outhist = [outdegrees.values().count(x) for x in outvalues] nodes = G.number_of_nodes() outnewhist = [float(x)/nodes for x in outhist] plt.figure() plt.yscale('log') plt.xscale('log') plt.xlim(0.8,10000) line1, = plt.plot(invalues,innewhist,'r^', label='Indegree') plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)}) line2, = plt.plot(outvalues,outnewhist,'bo', label='Outdegree') plt.legend(handler_map={line2: HandlerLine2D(numpoints=2)}) plt.title('Indegree and Outdegree Distribution till year '+str(year)) plt.xlabel('Degree, k') plt.ylabel('Fraction of nodes, P(k)') plt.savefig('../graphs/Indegree and Outdegree Distribution till year '+str(year)+'.png') plt.close()
def finding_community(): file_name = "data/amazon/com-amazon." print "...reading graph" with open(file_name + "ungraph.txt", "rb") as f: G = nx.read_adjlist(f, nodetype=int) print "...reading communities" communities = read_communities(file_name + "all.cmty.txt", G) alpha = 1.2 beta = 0.8 epsilon = 0.001 c = communities[10] ns = c.subgraph.nodes() print ns seed = ns[np.random.randint(len(ns))] print seed founded = detect_community(G, seed, beta, epsilon, alpha) print "Founded: ", founded.subgraph.nodes() nrel,rel, irel = evaluate_f1(c, founded) print (nrel, rel, irel)
def main(args): G = nx.read_adjlist(args["--graph"]) leaveOneOut = args["--folds"] == "loo" numFolds = None if not leaveOneOut: numFolds = int(args["--folds"]) else: numFolds = G.size() ofname = args["--out"] root = ET.Element("cvtest", name="{0}_{1}_test".format(args["--graph"], numFolds)) edges = G.edges() random.shuffle(edges) kf = KFold(G.size(), numFolds, indices=True) for i, (trainIDs, testIDs) in enumerate(kf): tset = ET.SubElement(root, "testset", name="fold_{0}".format(i)) trainEdges = [edges[i] for i in trainIDs] testEdges = [edges[j] for j in testIDs] for u,v in testEdges: ET.SubElement(tset, "edge", u=u, v=v) with open(ofname, 'wb') as ofile: ofile.write(ET.tostring(root, pretty_print=True))
def plot_original(pathway): # G = nx.Graph() G=nx.read_adjlist(pathway+".adjlist") rlist = set() clist = set() for n in G.nodes(): if n[0] == 'C': clist.add(n) else: rlist.add(n) # c=[random.random() ]*nx.number_of_nodes(G) # nx.draw_networkx_edges(G,alpha=0.4) pos=nx.spring_layout(G) # positions for all nodes nx.draw_networkx_nodes(G,pos, nodelist=rlist, node_size=0.2, #with_labels=False, node_color='green', alpha=1.0) nx.draw_networkx_nodes(G,pos, nodelist=clist, node_color='blue', node_size=1.0, alpha=1.0) nx.draw_networkx_labels(G,pos,font_size=0.3, font_color='red') nx.draw_networkx_edges(G,pos) print len(G.nodes()) print len(G.edges()) plt.axis('off') plt.savefig("node.svg", )
def test_adjlist_delimiter(self): fh = io.BytesIO() G = nx.path_graph(3) nx.write_adjlist(G, fh, delimiter=':') fh.seek(0) H = nx.read_adjlist(fh, nodetype=int, delimiter=':') assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges()))
def run_Louvain(fNet, fMask, fOutImg, fOutInfo): ''' A wrapper function for network community detection by the Louvain method. Only the largest connected component is parcellated into modules. input parameters: fNet: the adjacency list filename for the network fMask: the filename for the mask image. Its header is used to create a modular parcellation image fOutImg: the filename for the output image with modular parcellation fOutInfo: the filename with information on modules and modularity. returns: NONE output: This function generates files recording modular parcellation. fOutImg: Modular parcellation image fOutInfo: Modular parcellation information as a numpy .npz file. It includes: Q: The modularity Q NMods: The number of modules ModID: Module ID NNodes: The number of nodes in a module. In the same order as ModID ''' # loading the network data G = nx.read_adjlist(fNet, nodetype=int) # just the largest subgraph GC = max(nx.connected_component_subgraphs(G), key=len) # computing the best partition partition = community.best_partition(GC) # calculating the modularity Q = community.modularity(partition, GC) # converting the partition into arrays VoxInd = [int(i) for i in partition.keys()] ModInd = np.array(list(partition.values()))+1 # the module number starts with 1 # calculating sizes of the modules NMods = np.max(ModInd) ModID = range(1,NMods+1) NNodes = [] for iMod in ModID: tmpNNodes = len(np.nonzero(ModInd == iMod)[0]) NNodes.append(tmpNNodes) # reading in the mask image header & data img_mask = nib.load(fMask) X_mask = img_mask.get_data() # organizing the output Xout = np.zeros_like(X_mask) VoxXYZ = np.unravel_index(VoxInd, X_mask.shape) Xout[VoxXYZ] = ModInd # writing out the image modimg = nib.Nifti1Image(Xout, img_mask.get_affine()) nib.save(modimg, fOutImg) # writing out module stats np.savez(fOutInfo, Q=Q, NMods=NMods, ModID=ModID, NNodes=NNodes)
def llegit_graf(): import networkx as nx G=nx.Graph() # nom=raw_input(" entreu el nom del fitxer amb el graf: ") nom="graf.txt" G = nx.read_adjlist(nom,nodetype=int) # G = nx.read_adjlist("graf.txt",nodetype=int) return G
def reading_graph(): file_name = "data/youtube/com-youtube.ungraph.txt" f = open(file_name, "rb") print "... reading the undirected graph from " + file_name G = nx.read_adjlist(f) f.close() print "#nodes: %i #edges: %i" % (G.number_of_nodes(), G.number_of_edges())
def load_data(adjlist_file, feature_file): g = nx.read_adjlist(adjlist_file, create_using=nx.Graph(), nodetype = int) features = {} f = open(feature_file) for line in f: n1,n2,f1,f2 = line.strip().split() features[(int(n1),int(n2))] = [float(f1),float(f2)] f.close() return g, features
def reading_communities(): file_name = "data/dblp/com-dblp." with open(file_name + "ungraph.txt", "rb") as f: G = nx.read_adjlist(f, nodetype=int) communities = read_communities(file_name + "all.cmty.txt", G) print "#communities: ", len(communities)
import networkx as nx, community import pandas as pd # Import the network G = nx.read_adjlist(open("soc-Epinions1.txt", "rb")) # Extract community structure and save it as a data series partition = pd.Series(community.best_partition(G)) # Find the index of the 10th largest community top10 = partition.value_counts().index[9] # Extract the 10th largest community # Remember that node labels are strings! subgraph = partition[partition == top10].index.values.astype('str') F = G.subgraph(subgraph) # Calculate the network measures df = pd.DataFrame() df["degree"] = pd.Series(nx.degree_centrality(F)) df["closeness"] = pd.Series(nx.closeness_centrality(F)) df["betweenness"] = pd.Series(nx.betweenness_centrality(F)) df["eigenvector"] = pd.Series(nx.eigenvector_centrality(F)) df["clustering"] = pd.Series(nx.clustering(F)) # Calculate the correlations print(df.corr())
#--------------------------------------# # Parse Command Line Arguments #--------------------------------------# parser = argparse.ArgumentParser( description='Visualise basic connections within the mutual graph.') parser.add_argument('--egos', dest='ego_users', nargs='+', action='store', help='Users to analyse mutuals of.') args = parser.parse_args() #--------------------------------------# # Reading Graph #--------------------------------------# G = nx.read_adjlist("graph.adjlist") #--------------------------------------# # Show only users that are mutual to the given users #--------------------------------------# subgraph_nodes = set() for user in args.ego_users: subgraph_nodes.add(user) subgraph_nodes.update(G.neighbors(user)) subgraph = G.subgraph(subgraph_nodes) ## Filter by minimum degree minimum_degree = 2 removed_nodes = [
def generate_features_pair(uid_pair_list): """ Construct each function pairs' block feature map. """ feas_1 = [] feas_2 = [] num1 = [] num2 = [] node_length = [] # traversal all the pairs count = 0 for uid_pair in uid_pair_list: print uid_pair node_vector = [] block_feature_dic = {} with open( os.path.join(config.CVE_FEATURE_DIR, uid_pair[0] + "_fea.csv"), "r") as fp: for line in csv.reader(fp): if line[0] == "": continue # read every bolck's features block_feature = [float(x) for x in (line[1:16])] # print line[0],block_feature # 删除某一列特征 # del block_feature[6] block_feature_dic.setdefault(str(line[0]), block_feature) graph_cfg = nx.read_adjlist( os.path.join(config.CVE_FEATURE_DIR, uid_pair[0] + "_cfg.txt")) for node in graph_cfg.nodes(): node_vector.append(block_feature_dic[node]) node_length.append(len(node_vector)) num1.append(len(node_vector)) node_arr = np.array(node_vector) node_str = node_arr.astype(np.string_) feas_1.append(",".join(list(itertools.chain.from_iterable(node_str)))) node_vector = [] block_feature_dic = {} with open(os.path.join(config.FEA_DIR, uid_pair[1] + "_fea.csv"), "r") as fp: for line in csv.reader(fp): if line[0] == "": continue # read every bolck's features block_feature = [float(x) for x in (line[1:16])] # 删除某一列特征 # del block_feature[6] block_feature_dic.setdefault(str(line[0]), block_feature) graph_cfg = nx.read_adjlist( os.path.join(config.FEA_DIR, uid_pair[1] + "_cfg.txt")) for node in graph_cfg.nodes(): node_vector.append(block_feature_dic[node]) node_length.append(len(node_vector)) num2.append(len(node_vector)) node_arr = np.array(node_vector) node_str = node_arr.astype(np.string_) feas_2.append(",".join(list(itertools.chain.from_iterable(node_str)))) num1_re = np.array(num1) num2_re = np.array(num2) #num1_re = num1_arr.astype(np.string_) #num2_re = num2_arr.astype(np.string_) return feas_1, feas_2, np.max(node_length), num1_re, num2_re
# Instantiate the graph G1 = nx.Graph() # add node/edge pairs G1.add_edges_from([(0, 1), (0, 2), (0, 3), (0, 5), (1, 3), (1, 6), (3, 4), (4, 5), (4, 7), (5, 8), (8, 9)]) # draw the network G1 nx.draw_networkx(G1) # Adjacency List # G_adjlist.txt is the adjaceny list representation of G1. # 0 1 2 3 5 -> node 0 is adjacent to nodes 1, 2, 3, 5 # 1 3 6 -> node 1 is (also) adjacent to nodes 3, 6 # and so on. # If we read in the adjacency list using nx.read_adjlist, we can see that it matches G1. G2 = nx.read_adjlist('G_adjlist.txt', nodetype=int) G2.edges() nx.draw_networkx(G2) # Adjacency Matrix # The elements in an adjacency matrix indicate whether pairs of vertices are adjacent or not in the graph. Each node has a corresponding row and column. For example, row 0, column 1 corresponds to the edge between node 0 and node 1. # Reading across row 0, there is a '1' in columns 1, 2, 3, and 5, which indicates that node 0 is adjacent to nodes 1, 2, 3, and 5 G_mat = np.array([[0, 1, 1, 1, 0, 1, 0, 0, 0, 0], [1, 0, 0, 1, 0, 0, 1, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 1, 0, 1, 0, 0], [1, 0, 0, 0, 1, 0, 0, 0, 1, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 1],
import networkx as nx import numpy as np import matplotlib.pyplot as plt import pandas as pd ##### loading the network data # Brain (Berlin) G_Berlin = nx.read_adjlist('DataCentrality/Berlin_sub91116_aal90_d10_annotated.adjlist') # Brain (Leiden) G_Leiden = nx.read_adjlist('DataCentrality/Leiden_sub30943_aal90_d10_annotated.adjlist') # Brain (New York) G_NY = nx.read_adjlist('DataCentrality/NewYork_sub78118_aal90_d10_annotated.adjlist') # Brain (Oxford) G_Oxford = nx.read_adjlist('DataCentrality/Oxford_sub16112_aal90_d10_annotated.adjlist') # Brain (Queensland) G_Queen = nx.read_adjlist('DataCentrality/Queensland_sub42533_aal90_d10_annotated.adjlist') ##### eigenvector centrality Ceig_Berlin = nx.eigenvector_centrality(G_Berlin) Ceig_Leiden = nx.eigenvector_centrality(G_Leiden) Ceig_NY = nx.eigenvector_centrality(G_NY) Ceig_Oxford = nx.eigenvector_centrality(G_Oxford) Ceig_Queen = nx.eigenvector_centrality(G_Queen) ##### averaging eigenvector centralities Ceig_Avg = Ceig_Berlin.copy() listCeig = [Ceig_Berlin, Ceig_Leiden, Ceig_NY, Ceig_Oxford, Ceig_Queen]
import networkx as nx import matplotlib.pyplot as plt import pandas as pd DataFile1 = open('C:\Users\Kmutt_Wan\PycharmProjects\PPP.txt') G = nx.read_adjlist(DataFile1) nx.draw(G, edge_color='b', with_labels=True, edge_label=True) def dfs(graph, start, end): fringe = [(start, [])] while fringe: state, path = fringe.pop() if path and state == end: yield path continue for next_state in graph[state]: if next_state in path: continue fringe.append((next_state, path + [next_state])) cycles = [[node] + path for node in G for path in dfs(G, node, node)] print(len(cycles)) list3 = [] for node in cycles: if len(node) - 1 == 3: list3.append(node) tuple3 = tuple(list3) #
if __name__ == '__main__': parser = argparse.ArgumentParser(description='M-graphs') parser.add_argument('--output_directory', type=str, required=False, default="output", help='Generate random graph with given N nodes.') parser.add_argument('--random', type=int, required=False, help='Generate random graph with given N nodes.') parser.add_argument('--all', type=int, required=False, help='Generate all graphs with given N nodes.') parser.add_argument('--graph_to_color_path', type=str, required=False, help='Path to graph to color.') args = parser.parse_args() if (args.graph_to_color_path != None): G = nx.read_adjlist(args.graph_to_color_path, nodetype=int) start = time.time() chromatic_number = color_graph(G) end = time.time() print('chromatic_number: {0}, computed in: {1} seconds'.format(chromatic_number, end - start)) if (args.random != None): G = generate_random_m_graph_with_n_nodes(args) start = time.time() chromatic_number = color_graph(G) end = time.time() print('chromatic_number: {0}, computed in: {1} seconds'.format(chromatic_number, end - start)) if (args.all != None): generate_all_m_graphs_with_n_nodes(args)
def readGraph(path): # if 'ungraph' in path: return nx.read_adjlist(path, nodetype=int)
import matplotlib.pyplot as pl import networkx as nx import random as ra g = nx.DiGraph() g = nx.read_adjlist('impressed.txt', nodetype=int) a = nx.adj_matrix(g) walk = [] m = g.order() k = ra.randint(1, m) walk.append(k) for i in range(1000000): n = nx.neighbors(g, k) l = ra.sample(n, 1) walk.append(l[0]) k = l[0] ##print walk p = {} #my dictionary of pagerank q = {} #in-built dictioary of page rank for i in range(g.order()): p[i + 1] = 0 for i in walk: p[i] += 1 print p, 'my_solution' i = 0
from operator import itemgetter import networkx as nx from platypus import * G = nx.read_adjlist("input/Ventresca/ForestFire_n2000.txt") k = 200 num_of_tests = 10 class DfsGenerator(Generator): def __init__(self): super(DfsGenerator, self).__init__() self.step_size = G.number_of_nodes() // k def generate(self, problem): solution = Solution(problem) solution.variables[0] = list( nx.dfs_preorder_nodes(G, source=random.choice( list(G))))[::self.step_size] return solution # x - number of nodes with highest degree def degree_random(x): solution = degree_random.nodes_sorted_degree[:x] while len(solution) < k: node = random.choice(degree_random.nodes_sorted_degree) if node not in solution:
import networkx as nx import matplotlib.pyplot as plt import numpy as np ##### loading the network data # C Elegans neural network G_CEleg = nx.read_adjlist('DataSmallWorld/CElegans.adjlist') # Power grid G_Power = nx.read_gml('DataSmallWorld/power.gml', label='id') # Brain (ROI) G_ROI = nx.read_adjlist('DataSmallWorld/Oxford_sub16112_aal90_d5.adjlist') # Brain (Voxel) G_Voxel = nx.read_adjlist('DataSmallWorld/Oxford_sub16112_voxel_d20.adjlist')
fin = open(path, 'r') while 1: line = fin.readline() if line == '': break vec = line.strip().split(' ') node_label_dict[vec[0]] = vec[1:] fin.close() return node_label_dict # -------------------------------------------------- # -------------------- test ----------------------- # -------------------------------------------------- if __name__ == '__main__': G = nx.read_adjlist(path='cora_adjlist.txt', create_using=nx.Graph()) # G = nx.read_adjlist(path='cora_doub_adjlist.txt', create_using=nx.DiGraph()) node_label_dict = read_node_label_downstream(path='cora_label.txt') save_any_obj(obj=node_label_dict, path='cora_node_label_dict.pkl') # {node ID: degree, ...} Gs = generate_dynamic_data(G) print('len(Gs[-1].nodes())', len(Gs[-1].nodes())) adjmatrix = nx.to_numpy_array(G) print( 'Is the graph symmetric? i.e. undirected graph?', (G == np.transpose(G)).all(), ' -- note the diff of edge # between directed and undirected nx graph') print('len(Gs[-1].edges())', len(Gs[-1].edges())) print('np.sum(adjmatrix)', np.sum(adjmatrix))
def graph_degrees(topo_seed): graph_file = graph_filename_from_seed(topo_seed) graph = nx.read_adjlist(graph_file) return [tup[1] for tup in graph.degree()]
import multiprocessing as mp import statistics import networkx as nx from platypus import NSGAII, EpsMOEA, SPEA2, IBEA, PAES, EpsNSGAII, Problem, Subset G = nx.read_adjlist("input/Ventresca/BarabasiAlbert_n500m1.txt") k = 50 num_of_tests = 10 def connected_components(exclude=None): if exclude is None: exclude = {} S = set(exclude) subgraph = nx.subgraph_view(G, filter_node=lambda n: n not in S) return nx.number_connected_components(subgraph) def cardinality_variance(exclude=None): if exclude is None: exclude = {} S = set(exclude) subgraph = nx.subgraph_view(G, filter_node=lambda n: n not in S) components = list(nx.connected_components(subgraph)) num_of_components = len(components) num_of_nodes = subgraph.number_of_nodes() variance = 0
def inferMain(options): ''' Main method for the subcommand 'infer'. Performs the parsimonious reconstruction by finding the minimum set of non-tree edges in the duplication history which explains the extant networks. ''' ## The global variables we will need to set based on the ## command line arguments. global undirected global cc global dc undirected = options.undirected directed = not undirected dc = 1.0 cc = options.cost_ratio # Read in the extant networks G = nx.read_adjlist( options.target, create_using=nx.Graph() if undirected else nx.DiGraph()) # Read in the gene tree and convert it to a NetworkX graph T = treeToGraph(cogent.LoadTree(options.duplications)) rv = findRoot(T) # Compute some important sets of nodes from the extant network and the tree # specifically, the set of lost and extant nodes leaves = set([n for n in T.nodes() if len(T.successors(n)) == 0]) lostNodes = set(filter(lambda x: x.find('LOST') != -1, leaves)) extantNodes = leaves - lostNodes # Add back any isolated extant nodes (nodes which have no interactions) to the extant network isolatedNodes = extantNodes - set(G.nodes()) if len(isolatedNodes) > 0: logging.info( "Adding isolated extant nodes {0} to the extant network".format( isolatedNodes)) G.add_nodes_from(isolatedNodes) prepareTree(T, rv, lostNodes) blockingLoops = [] constraints = [] hasBlockingLoops = True SolutionGraph = nx.DiGraph() loopCtr = 0 t = 0 while (hasBlockingLoops): if not len(blockingLoops) == 0: l = blockingLoops.pop() for e in l.offendingEdges(): if not (e[0] in leaves and e[1] in leaves): constraints.append(e) break logging.info("Round {0}".format(t)) logging.info("CONSTRAINTS = {0}".format(constraints)) logging.info("Computing maximally parsimonious network history . . . ") cost, nonTreeEdges = parRecon(G, T, rv, lostNodes, constraints) nonTreeEdges = { e: d for e, d in nonTreeEdges.iteritems() if isEffectiveEdge(T, e[0], e[1], lostNodes) } logging.info("Cost = {0}".format(cost)) # Augment the tree with auxiliary edges pointing from a # node to all of its children if options.loop: augmentedT = T.copy() augmentedT.add_edges_from([(nte[0], nte[1]) for nte in nonTreeEdges] + [(nte[1], nte[0]) for nte in nonTreeEdges]) # Take the first blocking loop cycle = first_cycle.find_cycle(augmentedT, T) blockingLoops = [BlockingLoop(cycle, nonTreeEdges) ] if len(cycle) > 0 else [] nbl = len(blockingLoops) hasBlockingLoops = nbl > 0 if hasBlockingLoops: logging.info( "Fast cycle checker found blocking loop {0}".format(cycle)) else: hasBlockingLoops = False t += 1 if options.nonTreeEdges is not None: with open(options.nonTreeEdges, 'wb') as ofile: for nte, d in nonTreeEdges.iteritems(): u, v = nte # We only care about those non-tree edges that are between ancestors of extant nodes # which have not been lost ofile.write('{0}\t{1}\t{2}\n'.format(u, v, d)) exit(0)
import networkx as nx from random import * import matplotlib.pyplot as plt #Gnm Graph n = 10670 m = 22002 print "Creating Gnm Graph..." gnm = nx.gnm_random_graph(n, m) print len(gnm.nodes()) print len(gnm.edges()) #Real World Autonomous System Network print "Reading Real-World graph..." rw = nx.read_adjlist("oregon1_010331.txt.gz") print len(rw.nodes()) print len(rw.edges()) #Graph with preferential attachment print 'Creating preferential attachment graph...' pag = nx.complete_graph(40) new_node = 40 #labeling for first node to be added num_edges = len(pag.edges()) edges = pag.edges() while (len(pag.nodes()) < 10670): if len(pag.nodes()) % 100 == 0: print len(pag.nodes()) for i in range(2): #creating two new edges #get random edge from graph rand_edge = edges[randint(0, len(edges) - 1)]
def generate_network(graph_file): G = nx.read_adjlist(graph_file, nodetype=int) for x in list(G.nodes()): G.nodes[x]['data'] = agent() return G
def additive_fitness_model(out_suffix, track_till, num_threads): fit_x = lambda size: np.random.pareto(1, size) t = 0 # time counter # loading initial graph generated from real data start_year = 1960 end_year = 1965 file_name = 'aps_initial_graph_' + str(start_year) + '_' + str(end_year) g = nx.read_adjlist('./' + file_name, create_using=nx.DiGraph()) print 'initial graph: nodes:', len(g.nodes()), ', edges:', len(g.edges()) # Creating a node:[edges] dictionary. node_edges_dict = dict() with open('node_edge_dict') as f: node_edges_dict = json.load(f) # Creating a node:year dictionary. node_year_dict = dict() with open('node_year_dict') as f: node_year_dict = json.load(f) # Creating a year:nodes dictionary. year_node_dict = dict() with open('year_node_dict') as f: year_node_dict = json.load(f) # initializing year attributes for each citation for citation in g.edges(): g.edge[citation[0]][citation[1]]['year'] = 0 # Assigning fitness attributes for each paper for paper in g.nodes(): g.node[paper]['fitness'] = fit_x(1)[0] #print(g.node[paper]['fitness'], g.node[paper]['location']) #exit(0) try: # Sorting nodes according to the year in which they appeared. nodes_by_year = sorted(node_year_dict.keys(), key=lambda x: int(node_year_dict[str(x)])) # for i in range(10): # print(nodes_by_year[i], node_year_dict[nodes_by_year[i]]) i = 0 newEdges = [] curYear = end_year for newNode in nodes_by_year: # print("...... {0} ......".format(len(deg_roulette))) i += 1 #print(i) if int(node_year_dict[newNode]) <= end_year: continue if int(node_year_dict[newNode]) > track_till + 10: break # Changing year. if int(node_year_dict[newNode]) != curYear: t = t + (int(node_year_dict[newNode]) - curYear) print("********* {0} *********".format(t)) curYear = int(node_year_dict[newNode]) k = -1 try: k = len(node_edges_dict[newNode]) except KeyError as ke: # If node has no references in real data. continue newFit = fit_x(1)[0] # print('**',k) j = 0 # pas = 0 newEdges[:] = [] node_link_probs = [(g.in_degree(x) + g.node[x]['fitness'], x) for x in g.nodes()] link_probs = np.array([l[0] for l in node_link_probs]) sum_probs = np.sum(link_probs) # link_probs = [ l[0]/float(sum_probs) for l in link_probs ] link_probs = link_probs / float(sum_probs) newEdges = list( np.random.choice([l[1] for l in node_link_probs], k, False, link_probs)) g.add_node(newNode, fitness=newFit) for newDest in newEdges: g.add_edge(newNode, newDest) g.edge[newNode][newDest]['year'] = t #print(g.number_of_nodes(), g.number_of_edges()) except KeyboardInterrupt as e: #exit(0) pass # saving network in the form of an edge list out_papers = './AllPapersSim_' + out_suffix out_edges = './TemporalEdgeListSim_' + out_suffix f = open(out_papers, 'wb') for paper in g.nodes(): f.write(str(paper) + '\n') f.close() f = open(out_edges, 'wb') for citation in g.edges(): # print(g.edge[citation[0]][citation[1]]['year']) f.write( str(citation[0]) + '\t' + str(citation[1]) + '\t' + str(g.edge[citation[0]][citation[1]]['year']) + '\n') pass f.close() print 'final graph: nodes:', len(g.nodes()), ', edges:', len(g.edges())
import networkx as nx # read pkl from channel mentions # statistics of number of mentions per channel # compare with featured channel list from database import * db = YTDatabase() DIR = '../../data/' with db._session_scope(False) as session: #num_featured = session.query(FeaturedChannel).count() if os.path.isfile(DIR+'networkx_graph_ytDatabase.adjlist'): G=nx.read_adjlist(DIR+'networkx_graph_ytDatabase.adjlist', create_using=nx.DiGraph()) num_featured = G.number_of_edges() with open(DIR+'channel_to_channel_mentions_with_ID.pkl', 'rb') as input: mentions = pickle.load(input) with open(DIR+'video_to_channel_mentions_with_ID.pkl', 'rb') as input: videomentions = pickle.load(input) for cid in videomentions: for vid in videomentions[cid]: mid = session.query(Video.channelID).filter(Video.id==vid).first()[0] mentions[mid].append(cid) non_emptys = {} for m in mentions:
import matplotlib.pyplot as plt from matplotlib import pylab import networkx as nx def draw_graph(graph): # There are graph layouts like shell, spring, spectral and random. # Shell layout usually looks better, so we're choosing it. # I will show some examples later of other layouts # graph_pos = nx.graphviz_layout(G) graph_pos = nx.spring_layout(G,scale=20) # graph_pos = nx.spectral_layout(G) # draw nodes, edges and labels nx.draw_networkx_nodes(G, graph_pos, node_size=50, node_color='blue', alpha=0.3) nx.draw_networkx_edges(G, graph_pos) # nx.draw_networkx_labels(G, graph_pos, font_size=3, font_family='sans-serif') # show graph plt.show() # G = nx.random_geometric_graph(200, 0.125) G = nx.Graph() edges = nx.read_edgelist('edges.txt') nodes = nx.read_adjlist("nodes.txt") G.add_edges_from(edges.edges()) G.add_nodes_from(nodes) draw_graph(G)
c5.append(d[1][a4[i]]) c5 = np.array(c5) rc5 = c5.mean() for i in range(len(a5)): c6.append(d[1][a5[i]]) c6 = np.array(c6) rc6 = c6.mean() #4>1>6>2>3>5 #c=np.concatenate((c1,c2,c3,c4,c5,c6),axis=0) #%%利用networkx库绘制管道风险等级颜色分级图;数据点上千时效果较差,可用arcgis可视化 #打开之间保存的图邻接列表和节点位置文件,重现图 ba = nx.read_adjlist("1adj.txt", nodetype=int) pos2 = np.load('1pos.npy').item() #nx.draw_networkx(ba,pos2,with_labels = False, node_size = 10) #传统模型连续着色 A = nx.draw_networkx_nodes(ba, pos2, a0, node_size=1, node_color=c1, cmap='Reds') nx.draw_networkx_nodes(ba, pos2, a1, node_size=1, node_color=c2, cmap='Reds') nx.draw_networkx_nodes(ba, pos2, a2, node_size=1, node_color=c3, cmap='Reds') nx.draw_networkx_nodes(ba, pos2, a3, node_size=1, node_color=c4, cmap='Reds') nx.draw_networkx_nodes(ba, pos2, a4, node_size=1, node_color=c5, cmap='Reds') nx.draw_networkx_nodes(ba, pos2, a5, node_size=1, node_color=c6, cmap='Reds')
from sklearn.preprocessing import StandardScaler ############################################################ ### COMMUNITIES DETECTION ############################################################ cwd = os.getcwd() XX = ["rattus_norvegicus", "elegans", "coli", "drosop", "arab", "human", "inter_H-Y", "mus_musculus", "rattus_norvegicus", "yeast"] if not os.path.exists('net-communities'): os.makedirs('net-communities') for i in XX: G = nx.read_adjlist(str(cwd) + '/data/edge_lists_ints/' + i + '.txt') m = nx.community.greedy_modularity_communities(G) c = list(m) J = open(cwd+'/net-communities/com-fastgreedy-'+i+'_NX.txt', 'w') J.write ('# '+str(len(c))+' communities; '+str(G.number_of_nodes())+' elements \n') for q in range(len(c)): J.write ('C'+str(q+1)+'-'+str(len(c[q]))) for w in c[q]: J.write(' '+str(w)) J.write('\n') J.close() ############################################################ ### NORMALIZACIÓN DE LOS EMBEDDINGS DE DEEPWALK
# To Run this script: python yearwise_averageclusteringcoefficient.py import networkx as nx import matplotlib.pyplot as plt import sys avgclusteringcoef = [] year = [] for x in range(1975, 2005 + 1): fh = open("../data/adjlistfile_till_year_" + str(x)) G = nx.read_adjlist(fh, create_using=nx.DiGraph()) avgclusteringcoef.append(nx.average_clustering(G.to_undirected())) year.append(x) print avgclusteringcoef print year plt.figure() plt.plot(year, avgclusteringcoef, 'b^') plt.xlim(1974, 2006) plt.title('Average Clustering Coefficient vs Year plot') plt.xlabel('Year') plt.ylabel('Average Clustering Coefficient') plt.savefig('../graphs/Average Clustering Coefficient vs Year plot.png') plt.close()
def selectFromGroudTruthDataFrom2Nei(): '从groud-truth data里面筛选查询节点,和查询属性' path = 'L:/ACQData/groundTruthData/' data = 'washington' dataName = data + '/' + data edgePath = path + dataName + '_graph' classFile = open(path + dataName + '_class', 'r') labelFile = open(path + dataName + '_nodelabel', 'r') queryTimes = 100 queryFile = open(path + dataName + '_query_2Nei_w3_' + str(queryTimes), 'w') wordNum = 3 ###指定的筛选属性的个数 '读图' G = nx.read_adjlist(edgePath, nodetype=int) ##获取度 degreeDict = nx.degree(G) '读社团分组' communityGroup = defaultdict(list) ##社团分组 for line in classFile.readlines(): line = line.strip() words = line.split() communityGroup[words[1]].append(int(words[0])) classFile.close() '读节点标签' labelDict = {} for line in labelFile.readlines(): line = line.strip() words = line.split() labelDict[int(words[0])] = words[1:] ##属性还是str格式 labelFile.close() '统计社团中属性出现频率' comWordFrequents = {} comWordFrequents = comWordFrequents.fromkeys(communityGroup.keys(), {}) ##初始化 comWordNodeGroup = {} comWordNodeGroup = comWordNodeGroup.fromkeys(communityGroup.keys(), {}) for className, nodes in communityGroup.items(): wordFre = {} wordNode = defaultdict(list) ###对于每个社团,统计一下词频 for node in nodes: for label in labelDict[node]: if wordFre.has_key(label): wordFre[label] += 1 else: wordFre[label] = 1 wordNode[label].append(node) #### comWordFrequents[className] = wordFre comWordNodeGroup[className] = wordNode '在同一个社团中,出现频率最大的k个关键词的查询组,随机选择[1,2,4,8,16]个查询节点' for className, wordNodeGroup in comWordNodeGroup.items(): attrFreDict = comWordFrequents[className] attrNodeDict = comWordNodeGroup[className] ###这个社团里面的 '选择前两个属性' selectAttrs = [] tmp = sorted(attrFreDict.items(), key=lambda d: d[1], reverse=True)[0:wordNum] ##选择前wordNum个 for tuple in tmp: selectAttrs.append(tuple[0]) ##只选择属性 '在包含这些属性的节点里面选节点' nodeSet = set() for label in selectAttrs: for node in attrNodeDict[label]: nodeSet.add(node) '选择社团里面度最大的节点以及他的邻居' maxDNode = nodeSet.pop() maxD = degreeDict[maxDNode] for node in nodeSet: if degreeDict[node] > maxD: maxDNode = node maxD = degreeDict[node] '随机选度最大的节点的邻居,一个社团生成十次查询文件' for i in range(queryTimes): count = random.randint(1, maxD - 1) '在度最大的节点的2度邻居里面选' oneHopNeis = nx.neighbors(G, maxDNode) twoHopneis = [] for n in oneHopNeis: twoHopneis.extend(nx.neighbors(G, n)) ##2度邻居 '节点筛选范围是nodeList' nodeList = [] nodeList.extend(oneHopNeis) nodeList.extend(twoHopneis) selectNodeSet = set() ##最终入选的节点集 while len(selectNodeSet) < count and len(nodeList) >= count: randIndex = random.randint(0, len(nodeList) - 1) selectNodeSet.add(nodeList[randIndex]) '选好节点,输出文件' string = 'qn:' + '\t' + str(count) + '\tnode:' for n in selectNodeSet: string += '\t' + str(n) string += '\t' + 'attr:' for a in selectAttrs: string += '\t' + a queryFile.write(string + '\n')
# Name: # Author: rotem.tal # Description: # import networkx as nx from matplotlib.pyplot import * g = nx.read_adjlist("tfid") nx.draw(g, with_labels=True) show()
import numpy as np import networkx as nx import matplotlib.pyplot as plt import matplotlib as mpl import math import random G = nx.read_adjlist('/Users/qinglingzhang/network_ER200.csv', comments='#', create_using=nx.Graph(), delimiter=',', nodetype=int, encoding='utf-8') G.remove_node(0) G0 = G.copy() #graph not to be assortativity_preserving rewired G0_assort = nx.degree_assortativity_coefficient(G0) L = G0.number_of_edges() #N=len(G) #g_degrees=list(G.degree().values()) #kmax=max(g_degrees) rew = L * 100 #rew=L/2.0*math.log(1000000) n = 0 while n < rew: edgelist = nx.edges(G) p1 = random.choice(edgelist) p2 = random.choice(edgelist) a, b, c, d = p1[0], p1[1], p2[0], p2[1]
def LoadGraph(): log.info("started loading graph") return nx.read_adjlist("friends.adjlist") log.info("loaded graph")
""" Assignment 3 :- Random Walk implementation and rank comparison of the ranks calculated from Random Walk and from inbuilt pagerank method. Dataset used :- Google Web Graph from Snap Link to Dataset used :- https://snap.stanford.edu/data/web-Google.html Submitted By :- Abhishek Sharma (2015eeb1043) """ import networkx as nx import random # Loaded the graph from given adjacency list txt file of Google Web Graph dataset from Snap. G = nx.read_adjlist('webgraph_dataset_google.txt') # Created empty dictionary to keep track of count of individual nodes. count = {} # Assigned the values for all nodes as 0 initially. for i in list(G.nodes()): count[i] = 0 # curr_node will be the current node in the Random Walk from where we have to move further # to its neighbors or teleport. # It is initialized with some random node from the nodes of the Graph G. curr_node = random.choice(list(G.nodes())) # Random walk will happen until this loop ends. for i in range(10000000): # Below list will keep the neighbors of curr_node.
#finding the leader node in the out neighbours of any random node #Group: #Chintala Tejaswini 2016csb1036 #Aluvala Mamatha 2014csb1006 import networkx as nx import matplotlib.pyplot as plt from random import choice X = nx.read_adjlist("pagerank.txt", nodetype=int, create_using=nx.DiGraph()) #print X.nodes() #print X.edges() random_node = choice(list(X.nodes())) print "Random_Node:" + str(random_node) + "\nOut_Neighbors:" print X.neighbors(random_node) l = nx.pagerank(X) maxnode = X.neighbors(random_node)[0] for i in X.neighbors(random_node): if (l[maxnode] < l[i]): maxnode = i print "Leader in the out neighbours:" + str(maxnode) nx.draw(X, with_labels=1) plt.show()
def loadNetwork(f, ext): if ext == "gml": try: return nx.read_gml(f) except Exception, e: print("Couldn't load " + f + " as gml.") return False elif ext == "net": try: return nx.read_pajek(f) except Exception, e: print("Couldn't load " + f + " as pajek.") return False else: # assume it's just an adjacency list try: return nx.read_adjlist(f) except Exception, e: print(e) print("Couldn't load " + f + " as adjacency list.") # This is an interruptible thread that is created by the calcOne() function # for all of the networkx computations. The purpose of this is to allow # very long calculations to be interruptible and, in the future, be parallelized. # It uses a trace that monitors each line of execution and monitors an internal `killed` # state that can be toggled to instantly kill the thread cleanly from within. class workerThread(threading.Thread): def __init__(self, func, args, q): threading.Thread.__init__(self) self.func = func self.args = args